diff --git a/data/hfopenllm_v2/0-hero/Matter-0.2-7B-DPO/40e80d5e-db72-46b7-bd14-b7d005df4be8.json b/data/hfopenllm_v2/0-hero/Matter-0.2-7B-DPO/40e80d5e-db72-46b7-bd14-b7d005df4be8.json new file mode 100644 index 000000000..13d42abff --- /dev/null +++ b/data/hfopenllm_v2/0-hero/Matter-0.2-7B-DPO/40e80d5e-db72-46b7-bd14-b7d005df4be8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/0-hero_Matter-0.2-7B-DPO/1762652579.4626381", + "retrieved_timestamp": "1762652579.462642", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "0-hero/Matter-0.2-7B-DPO", + "developer": "0-hero", + "inference_platform": "unknown", + "id": "0-hero/Matter-0.2-7B-DPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3302792147058693 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3596254301656297 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.381375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1163563829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-34B-32K/0d91a153-1b6b-4891-8722-a5c7e372ba64.json b/data/hfopenllm_v2/01-ai/Yi-1.5-34B-32K/0d91a153-1b6b-4891-8722-a5c7e372ba64.json new file mode 100644 index 000000000..80547e421 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-1.5-34B-32K/0d91a153-1b6b-4891-8722-a5c7e372ba64.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B-32K/1762652579.463656", + "retrieved_timestamp": "1762652579.463657", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "01-ai/Yi-1.5-34B-32K", + "developer": "01-ai", + "inference_platform": "unknown", + "id": "01-ai/Yi-1.5-34B-32K" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3118691737922047 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6015685776542417 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1540785498489426 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36325503355704697 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4398229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4709109042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat-16K/2192007d-1f6e-4f74-b518-7448ef3a896e.json b/data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat-16K/2192007d-1f6e-4f74-b518-7448ef3a896e.json new file mode 100644 index 000000000..d0dd58d6e --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat-16K/2192007d-1f6e-4f74-b518-7448ef3a896e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B-Chat-16K/1762652579.464125", + "retrieved_timestamp": "1762652579.4641259", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "01-ai/Yi-1.5-34B-Chat-16K", + "developer": "01-ai", + "inference_platform": "unknown", + "id": "01-ai/Yi-1.5-34B-Chat-16K" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.456449997118756 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6100218256499571 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21374622356495468 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43976041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45445478723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat/e335874b-9b3e-4966-a7e0-22e9d16f8324.json b/data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat/e335874b-9b3e-4966-a7e0-22e9d16f8324.json new file mode 100644 index 000000000..e872cb56c --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-1.5-34B-Chat/e335874b-9b3e-4966-a7e0-22e9d16f8324.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B-Chat/1762652579.463886", + "retrieved_timestamp": "1762652579.4638872", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "01-ai/Yi-1.5-34B-Chat", + "developer": "01-ai", + "inference_platform": "unknown", + "id": "01-ai/Yi-1.5-34B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6066758423205982 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6083748310271819 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.277190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3649328859060403 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4281979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45204454787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-34B/8409c158-ef12-4e6c-8a1d-7be2084b3446.json b/data/hfopenllm_v2/01-ai/Yi-1.5-34B/8409c158-ef12-4e6c-8a1d-7be2084b3446.json new file mode 100644 index 000000000..09588b762 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-1.5-34B/8409c158-ef12-4e6c-8a1d-7be2084b3446.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B/1762652579.4633532", + "retrieved_timestamp": "1762652579.463354", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "01-ai/Yi-1.5-34B", + "developer": "01-ai", + "inference_platform": "unknown", + "id": "01-ai/Yi-1.5-34B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2841172533322695 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5976391706360018 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15332326283987915 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36577181208053694 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4236041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4665890957446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-6B-Chat/3452e57f-3023-4e2e-ad84-b09e409fe334.json b/data/hfopenllm_v2/01-ai/Yi-1.5-6B-Chat/3452e57f-3023-4e2e-ad84-b09e409fe334.json new file mode 100644 index 000000000..7d05d24e5 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-1.5-6B-Chat/3452e57f-3023-4e2e-ad84-b09e409fe334.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-6B-Chat/1762652579.464571", + "retrieved_timestamp": "1762652579.464572", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "01-ai/Yi-1.5-6B-Chat", + "developer": "01-ai", + "inference_platform": "unknown", + "id": "01-ai/Yi-1.5-6B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5145270105542183 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4571311331954389 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1623867069486405 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43917708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3193151595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-6B/1a1f1263-96b6-4e32-a2c8-6c0d6b47dff9.json b/data/hfopenllm_v2/01-ai/Yi-1.5-6B/1a1f1263-96b6-4e32-a2c8-6c0d6b47dff9.json new file mode 100644 index 000000000..802fda80e --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-1.5-6B/1a1f1263-96b6-4e32-a2c8-6c0d6b47dff9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-6B/1762652579.464354", + "retrieved_timestamp": "1762652579.464355", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "01-ai/Yi-1.5-6B", + "developer": "01-ai", + "inference_platform": "unknown", + "id": "01-ai/Yi-1.5-6B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26166017278598563 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44925820198929056 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43740625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31441156914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-9B-32K/df9d9d44-daa1-4e61-9b46-192380043889.json b/data/hfopenllm_v2/01-ai/Yi-1.5-9B-32K/df9d9d44-daa1-4e61-9b46-192380043889.json new file mode 100644 index 000000000..9e8fb948c --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-1.5-9B-32K/df9d9d44-daa1-4e61-9b46-192380043889.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B-32K/1762652579.4649951", + "retrieved_timestamp": "1762652579.464996", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "01-ai/Yi-1.5-9B-32K", + "developer": "01-ai", + "inference_platform": "unknown", + "id": "01-ai/Yi-1.5-9B-32K" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23031113002389217 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.496332115988265 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10800604229607251 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35906040268456374 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4186145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37649601063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat-16K/090c9691-4b7e-4a98-b9a2-644e21797be4.json b/data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat-16K/090c9691-4b7e-4a98-b9a2-644e21797be4.json new file mode 100644 index 000000000..9f8095e13 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat-16K/090c9691-4b7e-4a98-b9a2-644e21797be4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B-Chat-16K/1762652579.465471", + "retrieved_timestamp": "1762652579.465471", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "01-ai/Yi-1.5-9B-Chat-16K", + "developer": "01-ai", + "inference_platform": "unknown", + "id": "01-ai/Yi-1.5-9B-Chat-16K" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4214040966856829 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5153383364651778 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1782477341389728 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40990624999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39935172872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat/9256c32b-d956-418f-97da-ea78e3ad9e48.json b/data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat/9256c32b-d956-418f-97da-ea78e3ad9e48.json new file mode 100644 index 000000000..ff3ac391b --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-1.5-9B-Chat/9256c32b-d956-418f-97da-ea78e3ad9e48.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B-Chat/1762652579.465226", + "retrieved_timestamp": "1762652579.465226", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "01-ai/Yi-1.5-9B-Chat", + "developer": "01-ai", + "inference_platform": "unknown", + "id": "01-ai/Yi-1.5-9B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6045525871354672 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.555906430281685 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2258308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3347315436241611 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42590625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39752327127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-1.5-9B/904d1f91-3153-49d5-afd3-9921bfc086f1.json b/data/hfopenllm_v2/01-ai/Yi-1.5-9B/904d1f91-3153-49d5-afd3-9921bfc086f1.json new file mode 100644 index 000000000..9a4e6bc7a --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-1.5-9B/904d1f91-3153-49d5-afd3-9921bfc086f1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B/1762652579.464781", + "retrieved_timestamp": "1762652579.464782", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "01-ai/Yi-1.5-9B", + "developer": "01-ai", + "inference_platform": "unknown", + "id": "01-ai/Yi-1.5-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29358435617494916 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.514294179104191 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11404833836858005 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37919463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43278124999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3916223404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-34B-200K/fb2ebd9a-f5b8-42a2-9b58-e6f0e7d9b98a.json b/data/hfopenllm_v2/01-ai/Yi-34B-200K/fb2ebd9a-f5b8-42a2-9b58-e6f0e7d9b98a.json new file mode 100644 index 000000000..5655666dd --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-34B-200K/fb2ebd9a-f5b8-42a2-9b58-e6f0e7d9b98a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-34B-200K/1762652579.465893", + "retrieved_timestamp": "1762652579.465894", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "01-ai/Yi-34B-200K", + "developer": "01-ai", + "inference_platform": "unknown", + "id": "01-ai/Yi-34B-200K" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15424850507763843 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5441817925289527 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3565436241610738 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38171874999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45345744680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-34B-Chat/5d9b9217-874b-426d-8af4-5105a3b1b3ad.json b/data/hfopenllm_v2/01-ai/Yi-34B-Chat/5d9b9217-874b-426d-8af4-5105a3b1b3ad.json new file mode 100644 index 000000000..b31034cb0 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-34B-Chat/5d9b9217-874b-426d-8af4-5105a3b1b3ad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-34B-Chat/1762652579.466115", + "retrieved_timestamp": "1762652579.4661162", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "01-ai/Yi-34B-Chat", + "developer": "01-ai", + "inference_platform": "unknown", + "id": "01-ai/Yi-34B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4698887839820565 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5560872910766164 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06268882175226587 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39784375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4093251329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-34B/3ebcbf3d-cb2d-4332-bb8a-1db104033391.json b/data/hfopenllm_v2/01-ai/Yi-34B/3ebcbf3d-cb2d-4332-bb8a-1db104033391.json new file mode 100644 index 000000000..b6ff74f37 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-34B/3ebcbf3d-cb2d-4332-bb8a-1db104033391.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-34B/1762652579.4656792", + "retrieved_timestamp": "1762652579.46568", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "01-ai/Yi-34B", + "developer": "01-ai", + "inference_platform": "unknown", + "id": "01-ai/Yi-34B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3045751938190667 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5457099951794562 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36661073825503354 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4118541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.441156914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-6B-200K/6b720e8b-aab8-4ba4-9bce-e7a1de3cfb86.json b/data/hfopenllm_v2/01-ai/Yi-6B-200K/6b720e8b-aab8-4ba4-9bce-e7a1de3cfb86.json new file mode 100644 index 000000000..992afa075 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-6B-200K/6b720e8b-aab8-4ba4-9bce-e7a1de3cfb86.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-6B-200K/1762652579.4665558", + "retrieved_timestamp": "1762652579.466557", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "01-ai/Yi-6B-200K", + "developer": "01-ai", + "inference_platform": "unknown", + "id": "01-ai/Yi-6B-200K" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08433068702154728 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42892948109603307 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01812688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45873958333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2844082446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-6B-Chat/1120c801-7736-4d9d-b23d-08eeedb34186.json b/data/hfopenllm_v2/01-ai/Yi-6B-Chat/1120c801-7736-4d9d-b23d-08eeedb34186.json new file mode 100644 index 000000000..791c74d16 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-6B-Chat/1120c801-7736-4d9d-b23d-08eeedb34186.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-6B-Chat/1762652579.466805", + "retrieved_timestamp": "1762652579.466806", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "01-ai/Yi-6B-Chat", + "developer": "01-ai", + "inference_platform": "unknown", + "id": "01-ai/Yi-6B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33952135888331847 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41326019207548687 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36879166666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3061003989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-6B/297419fa-855c-4eae-ad7c-3cf4a0262450.json b/data/hfopenllm_v2/01-ai/Yi-6B/297419fa-855c-4eae-ad7c-3cf4a0262450.json new file mode 100644 index 000000000..fc94c92f1 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-6B/297419fa-855c-4eae-ad7c-3cf4a0262450.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-6B/1762652579.4663382", + "retrieved_timestamp": "1762652579.4663382", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "01-ai/Yi-6B", + "developer": "01-ai", + "inference_platform": "unknown", + "id": "01-ai/Yi-6B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28933784580468713 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4309230591000865 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39368749999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29911901595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-9B-200K/4299df04-495a-4687-b143-96b1b562d5e8.json b/data/hfopenllm_v2/01-ai/Yi-9B-200K/4299df04-495a-4687-b143-96b1b562d5e8.json new file mode 100644 index 000000000..08d337e81 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-9B-200K/4299df04-495a-4687-b143-96b1b562d5e8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-9B-200K/1762652579.467233", + "retrieved_timestamp": "1762652579.467233", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "01-ai/Yi-9B-200K", + "developer": "01-ai", + "inference_platform": "unknown", + "id": "01-ai/Yi-9B-200K" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23270921155866434 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4793302602023641 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42940625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36220079787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-9B/0ec59add-f9a9-4dbd-8a83-c6aec0b8ad21.json b/data/hfopenllm_v2/01-ai/Yi-9B/0ec59add-f9a9-4dbd-8a83-c6aec0b8ad21.json new file mode 100644 index 000000000..6bafb1fe4 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-9B/0ec59add-f9a9-4dbd-8a83-c6aec0b8ad21.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-9B/1762652579.46702", + "retrieved_timestamp": "1762652579.4670231", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "01-ai/Yi-9B", + "developer": "01-ai", + "inference_platform": "unknown", + "id": "01-ai/Yi-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2708779372066118 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49396075125308075 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.055891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40540624999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35738031914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/01-ai/Yi-Coder-9B-Chat/ef0cc3a5-0d62-4a45-b0c7-28a6f7dfdac4.json b/data/hfopenllm_v2/01-ai/Yi-Coder-9B-Chat/ef0cc3a5-0d62-4a45-b0c7-28a6f7dfdac4.json new file mode 100644 index 000000000..ea42c85e8 --- /dev/null +++ b/data/hfopenllm_v2/01-ai/Yi-Coder-9B-Chat/ef0cc3a5-0d62-4a45-b0c7-28a6f7dfdac4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/01-ai_Yi-Coder-9B-Chat/1762652579.4674509", + "retrieved_timestamp": "1762652579.4674518", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "01-ai/Yi-Coder-9B-Chat", + "developer": "01-ai", + "inference_platform": "unknown", + "id": "01-ai/Yi-Coder-9B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4817041006750976 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48142000339111674 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24748322147651006 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3991770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24251994680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct/a48b0864-76b7-4860-a448-942a8d74f68e.json b/data/hfopenllm_v2/1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct/a48b0864-76b7-4860-a448-942a8d74f68e.json new file mode 100644 index 000000000..6bc93d8c2 --- /dev/null +++ b/data/hfopenllm_v2/1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct/a48b0864-76b7-4860-a448-942a8d74f68e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/1-800-LLMs_Qwen-2.5-14B-Hindi-Custom-Instruct/1762652579.468073", + "retrieved_timestamp": "1762652579.468074", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct", + "developer": "1-800-LLMs", + "inference_platform": "unknown", + "id": "1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30774677854758703 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6284322714967584 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311178247734139 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3699664429530201 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4490625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.516373005319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/152334H/miqu-1-70b-sf/f57d7b8d-85d5-4e0b-8dec-31e2931487dd.json b/data/hfopenllm_v2/152334H/miqu-1-70b-sf/f57d7b8d-85d5-4e0b-8dec-31e2931487dd.json new file mode 100644 index 000000000..8b265339c --- /dev/null +++ b/data/hfopenllm_v2/152334H/miqu-1-70b-sf/f57d7b8d-85d5-4e0b-8dec-31e2931487dd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/152334H_miqu-1-70b-sf/1762652579.469194", + "retrieved_timestamp": "1762652579.469195", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "152334H/miqu-1-70b-sf", + "developer": "152334H", + "inference_platform": "unknown", + "id": "152334H/miqu-1-70b-sf" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5181740005407873 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6102361685099691 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12462235649546828 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35067114093959734 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45820833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42278922872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 68.977 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/1TuanPham/T-VisStar-7B-v0.1/1347cd1b-2ebc-4223-900f-7c2479e228a3.json b/data/hfopenllm_v2/1TuanPham/T-VisStar-7B-v0.1/1347cd1b-2ebc-4223-900f-7c2479e228a3.json new file mode 100644 index 000000000..4e3d74e80 --- /dev/null +++ b/data/hfopenllm_v2/1TuanPham/T-VisStar-7B-v0.1/1347cd1b-2ebc-4223-900f-7c2479e228a3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/1TuanPham_T-VisStar-7B-v0.1/1762652579.469481", + "retrieved_timestamp": "1762652579.469482", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "1TuanPham/T-VisStar-7B-v0.1", + "developer": "1TuanPham", + "inference_platform": "unknown", + "id": "1TuanPham/T-VisStar-7B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36070404305021786 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5052203113352468 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3210605053191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.294 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/1TuanPham/T-VisStar-v0.1/b2926dd6-628c-4274-b0e8-1efc64269bb2.json b/data/hfopenllm_v2/1TuanPham/T-VisStar-v0.1/b2926dd6-628c-4274-b0e8-1efc64269bb2.json new file mode 100644 index 000000000..c479ea184 --- /dev/null +++ b/data/hfopenllm_v2/1TuanPham/T-VisStar-v0.1/b2926dd6-628c-4274-b0e8-1efc64269bb2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/1TuanPham_T-VisStar-v0.1/1762652579.469921", + "retrieved_timestamp": "1762652579.469923", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "1TuanPham/T-VisStar-v0.1", + "developer": "1TuanPham", + "inference_platform": "unknown", + "id": "1TuanPham/T-VisStar-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36070404305021786 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5052203113352468 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3210605053191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.294 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/3rd-Degree-Burn/L-3.1-Science-Writer-8B/0c4fd071-b5c9-4bf1-a1d5-d658be1a3258.json b/data/hfopenllm_v2/3rd-Degree-Burn/L-3.1-Science-Writer-8B/0c4fd071-b5c9-4bf1-a1d5-d658be1a3258.json new file mode 100644 index 000000000..d5171a37c --- /dev/null +++ b/data/hfopenllm_v2/3rd-Degree-Burn/L-3.1-Science-Writer-8B/0c4fd071-b5c9-4bf1-a1d5-d658be1a3258.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/3rd-Degree-Burn_L-3.1-Science-Writer-8B/1762652579.470164", + "retrieved_timestamp": "1762652579.470165", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "3rd-Degree-Burn/L-3.1-Science-Writer-8B", + "developer": "3rd-Degree-Burn", + "inference_platform": "unknown", + "id": "3rd-Degree-Burn/L-3.1-Science-Writer-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42625012743963797 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5041306326216103 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10347432024169184 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3959479166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36494348404255317 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/4season/final_model_test_v2/74973e37-cd82-4e8a-816a-02b035fabff4.json b/data/hfopenllm_v2/4season/final_model_test_v2/74973e37-cd82-4e8a-816a-02b035fabff4.json new file mode 100644 index 000000000..864adc997 --- /dev/null +++ b/data/hfopenllm_v2/4season/final_model_test_v2/74973e37-cd82-4e8a-816a-02b035fabff4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/4season_final_model_test_v2/1762652579.4714398", + "retrieved_timestamp": "1762652579.4714408", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "4season/final_model_test_v2", + "developer": "4season", + "inference_platform": "unknown", + "id": "4season/final_model_test_v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3191132860809319 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6342049783295018 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08383685800604229 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271812080536913 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4314479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3528091755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 21.421 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-Instruct-preview/3766e8a0-99ad-4733-a01b-ced446b15eda.json b/data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-Instruct-preview/3766e8a0-99ad-4733-a01b-ced446b15eda.json new file mode 100644 index 000000000..aa5ed6ea3 --- /dev/null +++ b/data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-Instruct-preview/3766e8a0-99ad-4733-a01b-ced446b15eda.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AALF_FuseChat-Llama-3.1-8B-Instruct-preview/1762652579.471838", + "retrieved_timestamp": "1762652579.471839", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AALF/FuseChat-Llama-3.1-8B-Instruct-preview", + "developer": "AALF", + "inference_platform": "unknown", + "id": "AALF/FuseChat-Llama-3.1-8B-Instruct-preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7189579205397235 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5119887898349903 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24773413897280966 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38200000000000006 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3732546542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-SFT-preview/342ac912-805f-4166-b8f4-10f0503fa892.json b/data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-SFT-preview/342ac912-805f-4166-b8f4-10f0503fa892.json new file mode 100644 index 000000000..832cd58cd --- /dev/null +++ b/data/hfopenllm_v2/AALF/FuseChat-Llama-3.1-8B-SFT-preview/342ac912-805f-4166-b8f4-10f0503fa892.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AALF_FuseChat-Llama-3.1-8B-SFT-preview/1762652579.472149", + "retrieved_timestamp": "1762652579.47215", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AALF/FuseChat-Llama-3.1-8B-SFT-preview", + "developer": "AALF", + "inference_platform": "unknown", + "id": "AALF/FuseChat-Llama-3.1-8B-SFT-preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7280504616639405 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5240303130445233 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22507552870090636 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40199999999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37433510638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AGI-0/Art-v0-3B/162b6d5f-f983-4989-9603-f6baea26b633.json b/data/hfopenllm_v2/AGI-0/Art-v0-3B/162b6d5f-f983-4989-9603-f6baea26b633.json new file mode 100644 index 000000000..1b54cc9ef --- /dev/null +++ b/data/hfopenllm_v2/AGI-0/Art-v0-3B/162b6d5f-f983-4989-9603-f6baea26b633.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AGI-0_Art-v0-3B/1762652579.473539", + "retrieved_timestamp": "1762652579.47354", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AGI-0/Art-v0-3B", + "developer": "AGI-0", + "inference_platform": "unknown", + "id": "AGI-0/Art-v0-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.319238509377341 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3400959483013824 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24622356495468278 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3768229166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11785239361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AI-MO/NuminaMath-7B-CoT/9ac2ba3c-9a21-46b2-a21c-4909cfae6315.json b/data/hfopenllm_v2/AI-MO/NuminaMath-7B-CoT/9ac2ba3c-9a21-46b2-a21c-4909cfae6315.json new file mode 100644 index 000000000..cf8026494 --- /dev/null +++ b/data/hfopenllm_v2/AI-MO/NuminaMath-7B-CoT/9ac2ba3c-9a21-46b2-a21c-4909cfae6315.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AI-MO_NuminaMath-7B-CoT/1762652579.474318", + "retrieved_timestamp": "1762652579.4743192", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AI-MO/NuminaMath-7B-CoT", + "developer": "AI-MO", + "inference_platform": "unknown", + "id": "AI-MO/NuminaMath-7B-CoT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2688544173903022 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4314193495860012 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26963746223564955 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33034375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28681848404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.91 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AI-MO/NuminaMath-7B-TIR/0ffa78d4-fe45-4639-bcd1-eb19ab168a35.json b/data/hfopenllm_v2/AI-MO/NuminaMath-7B-TIR/0ffa78d4-fe45-4639-bcd1-eb19ab168a35.json new file mode 100644 index 000000000..8f2466fce --- /dev/null +++ b/data/hfopenllm_v2/AI-MO/NuminaMath-7B-TIR/0ffa78d4-fe45-4639-bcd1-eb19ab168a35.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AI-MO_NuminaMath-7B-TIR/1762652579.474566", + "retrieved_timestamp": "1762652579.474567", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AI-MO/NuminaMath-7B-TIR", + "developer": "AI-MO", + "inference_platform": "unknown", + "id": "AI-MO/NuminaMath-7B-TIR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27562423259174545 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41436913375897894 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1608761329305136 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35092708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2732712765957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.91 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AI-Sweden-Models/Llama-3-8B-instruct/1d68bd2e-de6e-4327-a8f1-33322eba537e.json b/data/hfopenllm_v2/AI-Sweden-Models/Llama-3-8B-instruct/1d68bd2e-de6e-4327-a8f1-33322eba537e.json new file mode 100644 index 000000000..b8946e8a6 --- /dev/null +++ b/data/hfopenllm_v2/AI-Sweden-Models/Llama-3-8B-instruct/1d68bd2e-de6e-4327-a8f1-33322eba537e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AI-Sweden-Models_Llama-3-8B-instruct/1762652579.474785", + "retrieved_timestamp": "1762652579.474786", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AI-Sweden-Models/Llama-3-8B-instruct", + "developer": "AI-Sweden-Models", + "inference_platform": "unknown", + "id": "AI-Sweden-Models/Llama-3-8B-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24012841482821137 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4173460154515302 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47709375000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25972406914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AI4free/Dhanishtha/a554a3eb-943c-4135-966b-929129ef025d.json b/data/hfopenllm_v2/AI4free/Dhanishtha/a554a3eb-943c-4135-966b-929129ef025d.json new file mode 100644 index 000000000..833d8203f --- /dev/null +++ b/data/hfopenllm_v2/AI4free/Dhanishtha/a554a3eb-943c-4135-966b-929129ef025d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AI4free_Dhanishtha/1762652579.475332", + "retrieved_timestamp": "1762652579.475332", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AI4free/Dhanishtha", + "developer": "AI4free", + "inference_platform": "unknown", + "id": "AI4free/Dhanishtha" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2451240486353985 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34039444943326375 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25604229607250756 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35694791666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16431183510638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AI4free/t2/332ccdb5-faf5-47c6-afeb-a91d2148adf0.json b/data/hfopenllm_v2/AI4free/t2/332ccdb5-faf5-47c6-afeb-a91d2148adf0.json new file mode 100644 index 000000000..802924140 --- /dev/null +++ b/data/hfopenllm_v2/AI4free/t2/332ccdb5-faf5-47c6-afeb-a91d2148adf0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AI4free_t2/1762652579.475577", + "retrieved_timestamp": "1762652579.475578", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AI4free/t2", + "developer": "AI4free", + "inference_platform": "unknown", + "id": "AI4free/t2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3866828902866616 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2910111436321769 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18957703927492447 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3846354166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11436170212765957 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AIDC-AI/Marco-o1/17f7398f-675d-4b38-b233-64fc106737c3.json b/data/hfopenllm_v2/AIDC-AI/Marco-o1/17f7398f-675d-4b38-b233-64fc106737c3.json new file mode 100644 index 000000000..354dbfb93 --- /dev/null +++ b/data/hfopenllm_v2/AIDC-AI/Marco-o1/17f7398f-675d-4b38-b233-64fc106737c3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AIDC-AI_Marco-o1/1762652579.47579", + "retrieved_timestamp": "1762652579.4757912", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AIDC-AI/Marco-o1", + "developer": "AIDC-AI", + "inference_platform": "unknown", + "id": "AIDC-AI/Marco-o1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.477083028586373 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5364362696398749 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37462235649546827 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41384375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41165226063829785 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Aashraf995/Creative-7B-nerd/7ea9f4db-5b52-40a5-904e-785e43302934.json b/data/hfopenllm_v2/Aashraf995/Creative-7B-nerd/7ea9f4db-5b52-40a5-904e-785e43302934.json new file mode 100644 index 000000000..60c612ebe --- /dev/null +++ b/data/hfopenllm_v2/Aashraf995/Creative-7B-nerd/7ea9f4db-5b52-40a5-904e-785e43302934.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Aashraf995_Creative-7B-nerd/1762652579.476046", + "retrieved_timestamp": "1762652579.476046", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aashraf995/Creative-7B-nerd", + "developer": "Aashraf995", + "inference_platform": "unknown", + "id": "Aashraf995/Creative-7B-nerd" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4721871301480073 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5606785565640195 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3164652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3263422818791946 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4515416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44921875 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AbacusResearch/Jallabi-34B/76397277-901a-4ad0-9dae-0351ca875ec6.json b/data/hfopenllm_v2/AbacusResearch/Jallabi-34B/76397277-901a-4ad0-9dae-0351ca875ec6.json new file mode 100644 index 000000000..c6cbac5b4 --- /dev/null +++ b/data/hfopenllm_v2/AbacusResearch/Jallabi-34B/76397277-901a-4ad0-9dae-0351ca875ec6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AbacusResearch_Jallabi-34B/1762652579.477037", + "retrieved_timestamp": "1762652579.4770381", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AbacusResearch/Jallabi-34B", + "developer": "AbacusResearch", + "inference_platform": "unknown", + "id": "AbacusResearch/Jallabi-34B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3528604103777976 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6023380603196266 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05211480362537765 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3389261744966443 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48217708333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4681682180851064 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Ahdoot/StructuredThinker-v0.3-MoreStructure/81a5aafb-2cf7-490d-b619-ce638fcc8b38.json b/data/hfopenllm_v2/Ahdoot/StructuredThinker-v0.3-MoreStructure/81a5aafb-2cf7-490d-b619-ce638fcc8b38.json new file mode 100644 index 000000000..a56c30373 --- /dev/null +++ b/data/hfopenllm_v2/Ahdoot/StructuredThinker-v0.3-MoreStructure/81a5aafb-2cf7-490d-b619-ce638fcc8b38.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Ahdoot_StructuredThinker-v0.3-MoreStructure/1762652579.4772868", + "retrieved_timestamp": "1762652579.477288", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ahdoot/StructuredThinker-v0.3-MoreStructure", + "developer": "Ahdoot", + "inference_platform": "unknown", + "id": "Ahdoot/StructuredThinker-v0.3-MoreStructure" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4192808415005519 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48376906494893984 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.290785498489426 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41582291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36103723404255317 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Ahdoot/Test_StealthThinker/43c907eb-3e43-47ff-b38d-f912ba6ef46c.json b/data/hfopenllm_v2/Ahdoot/Test_StealthThinker/43c907eb-3e43-47ff-b38d-f912ba6ef46c.json new file mode 100644 index 000000000..df4f9bab1 --- /dev/null +++ b/data/hfopenllm_v2/Ahdoot/Test_StealthThinker/43c907eb-3e43-47ff-b38d-f912ba6ef46c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Ahdoot_Test_StealthThinker/1762652579.4775438", + "retrieved_timestamp": "1762652579.4775438", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ahdoot/Test_StealthThinker", + "developer": "Ahdoot", + "inference_platform": "unknown", + "id": "Ahdoot/Test_StealthThinker" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42200361706937595 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46466398134666304 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17900302114803626 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42804166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35970744680851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder/48732edf-8baf-438e-8a5c-763eee6c0c18.json b/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder/48732edf-8baf-438e-8a5c-763eee6c0c18.json new file mode 100644 index 000000000..3b8e9b378 --- /dev/null +++ b/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder/48732edf-8baf-438e-8a5c-763eee6c0c18.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V0-Coder/1762652579.478028", + "retrieved_timestamp": "1762652579.478029", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder", + "developer": "AicoresSecurity", + "inference_platform": "unknown", + "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7097656440466851 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4477501104993749 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1487915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34079166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3178191489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0/38f169f0-e939-4b12-8f78-b2a27fb90de0.json b/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0/38f169f0-e939-4b12-8f78-b2a27fb90de0.json new file mode 100644 index 000000000..9eddb6a2c --- /dev/null +++ b/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V0/38f169f0-e939-4b12-8f78-b2a27fb90de0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V0/1762652579.4777558", + "retrieved_timestamp": "1762652579.477757", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V0", + "developer": "AicoresSecurity", + "inference_platform": "unknown", + "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6358018945287394 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4497434194912941 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11555891238670694 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33136458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.301030585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1.1/e8c63728-a1f5-432f-bf9f-204b0f4041aa.json b/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1.1/e8c63728-a1f5-432f-bf9f-204b0f4041aa.json new file mode 100644 index 000000000..cfc591eef --- /dev/null +++ b/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1.1/e8c63728-a1f5-432f-bf9f-204b0f4041aa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V1.1/1762652579.478466", + "retrieved_timestamp": "1762652579.478467", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V1.1", + "developer": "AicoresSecurity", + "inference_platform": "unknown", + "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6730209178313542 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4391775517124728 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17598187311178248 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35409375000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.308843085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1/b613ecbe-7b2b-4b03-ab2c-163f9988a8fc.json b/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1/b613ecbe-7b2b-4b03-ab2c-163f9988a8fc.json new file mode 100644 index 000000000..6369ee477 --- /dev/null +++ b/data/hfopenllm_v2/AicoresSecurity/Cybernet-Sec-3B-R1-V1/b613ecbe-7b2b-4b03-ab2c-163f9988a8fc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V1/1762652579.478252", + "retrieved_timestamp": "1762652579.4782531", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V1", + "developer": "AicoresSecurity", + "inference_platform": "unknown", + "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6145693426774292 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4282342020189216 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15181268882175228 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32869791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2876496010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Alepach/notHumpback-M0/1a4477f7-c414-41ab-bbcb-593f4a86031a.json b/data/hfopenllm_v2/Alepach/notHumpback-M0/1a4477f7-c414-41ab-bbcb-593f4a86031a.json new file mode 100644 index 000000000..a96d31649 --- /dev/null +++ b/data/hfopenllm_v2/Alepach/notHumpback-M0/1a4477f7-c414-41ab-bbcb-593f4a86031a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Alepach_notHumpback-M0/1762652579.4786859", + "retrieved_timestamp": "1762652579.478687", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Alepach/notHumpback-M0", + "developer": "Alepach", + "inference_platform": "unknown", + "id": "Alepach/notHumpback-M0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23500755772461512 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27849287879199425 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0188821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24916107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35523958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1118683510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Alepach/notHumpback-M1-v2/27c6c36d-6bd5-439b-bdc8-1bd0f8f4c9ea.json b/data/hfopenllm_v2/Alepach/notHumpback-M1-v2/27c6c36d-6bd5-439b-bdc8-1bd0f8f4c9ea.json new file mode 100644 index 000000000..02499a3dd --- /dev/null +++ b/data/hfopenllm_v2/Alepach/notHumpback-M1-v2/27c6c36d-6bd5-439b-bdc8-1bd0f8f4c9ea.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Alepach_notHumpback-M1-v2/1762652579.4791439", + "retrieved_timestamp": "1762652579.479145", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Alepach/notHumpback-M1-v2", + "developer": "Alepach", + "inference_platform": "unknown", + "id": "Alepach/notHumpback-M1-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2277135777514772 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2775640398406834 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3473333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1118683510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Alepach/notHumpback-M1/030f17b0-036f-4021-90da-6c1d38da659d.json b/data/hfopenllm_v2/Alepach/notHumpback-M1/030f17b0-036f-4021-90da-6c1d38da659d.json new file mode 100644 index 000000000..87a8aec8e --- /dev/null +++ b/data/hfopenllm_v2/Alepach/notHumpback-M1/030f17b0-036f-4021-90da-6c1d38da659d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Alepach_notHumpback-M1/1762652579.478936", + "retrieved_timestamp": "1762652579.4789371", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Alepach/notHumpback-M1", + "developer": "Alepach", + "inference_platform": "unknown", + "id": "Alepach/notHumpback-M1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2206944241279804 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28824720129981835 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23741610738255034 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10912566489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Alibaba-NLP/gte-Qwen2-7B-instruct/39ea9329-5ed7-46ea-bcc4-30679a63b405.json b/data/hfopenllm_v2/Alibaba-NLP/gte-Qwen2-7B-instruct/39ea9329-5ed7-46ea-bcc4-30679a63b405.json new file mode 100644 index 000000000..a6ef71f97 --- /dev/null +++ b/data/hfopenllm_v2/Alibaba-NLP/gte-Qwen2-7B-instruct/39ea9329-5ed7-46ea-bcc4-30679a63b405.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Alibaba-NLP_gte-Qwen2-7B-instruct/1762652579.479603", + "retrieved_timestamp": "1762652579.479604", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Alibaba-NLP/gte-Qwen2-7B-instruct", + "developer": "Alibaba-NLP", + "inference_platform": "unknown", + "id": "Alibaba-NLP/gte-Qwen2-7B-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22554045488193547 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4495144990818469 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06419939577039276 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24496644295302014 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35585416666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33211436170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Amaorynho/BBAI2006/ef37c096-a089-4d3e-9fad-c0f959a18bb3.json b/data/hfopenllm_v2/Amaorynho/BBAI2006/ef37c096-a089-4d3e-9fad-c0f959a18bb3.json new file mode 100644 index 000000000..e702ed215 --- /dev/null +++ b/data/hfopenllm_v2/Amaorynho/BBAI2006/ef37c096-a089-4d3e-9fad-c0f959a18bb3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Amaorynho_BBAI2006/1762652579.480136", + "retrieved_timestamp": "1762652579.4801369", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Amaorynho/BBAI2006", + "developer": "Amaorynho", + "inference_platform": "unknown", + "id": "Amaorynho/BBAI2006" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14670518668244703 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2704366990167133 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3605416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11228390957446809 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.09 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Amaorynho/BBAI270V4/183313de-d526-42a9-a35d-a4e71466e546.json b/data/hfopenllm_v2/Amaorynho/BBAI270V4/183313de-d526-42a9-a35d-a4e71466e546.json new file mode 100644 index 000000000..454e22507 --- /dev/null +++ b/data/hfopenllm_v2/Amaorynho/BBAI270V4/183313de-d526-42a9-a35d-a4e71466e546.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Amaorynho_BBAI270V4/1762652579.4803882", + "retrieved_timestamp": "1762652579.4803882", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Amaorynho/BBAI270V4", + "developer": "Amaorynho", + "inference_platform": "unknown", + "id": "Amaorynho/BBAI270V4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1990374428737971 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30712046736502824 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.008308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24580536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33139583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11136968085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Amaorynho/BBAIIFEV1/7c0342a3-5bd4-47b0-b238-d5dcb0f6236e.json b/data/hfopenllm_v2/Amaorynho/BBAIIFEV1/7c0342a3-5bd4-47b0-b238-d5dcb0f6236e.json new file mode 100644 index 000000000..356439e4d --- /dev/null +++ b/data/hfopenllm_v2/Amaorynho/BBAIIFEV1/7c0342a3-5bd4-47b0-b238-d5dcb0f6236e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Amaorynho_BBAIIFEV1/1762652579.480599", + "retrieved_timestamp": "1762652579.4806", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Amaorynho/BBAIIFEV1", + "developer": "Amaorynho", + "inference_platform": "unknown", + "id": "Amaorynho/BBAIIFEV1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8047369867507104 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5292462038560509 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1933534743202417 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4184895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3857214095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Amaorynho/BBAI_375/ad4b6e40-883c-47c5-ba33-6c112c2c6b09.json b/data/hfopenllm_v2/Amaorynho/BBAI_375/ad4b6e40-883c-47c5-ba33-6c112c2c6b09.json new file mode 100644 index 000000000..3e6868b2b --- /dev/null +++ b/data/hfopenllm_v2/Amaorynho/BBAI_375/ad4b6e40-883c-47c5-ba33-6c112c2c6b09.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Amaorynho_BBAI_375/1762652579.480799", + "retrieved_timestamp": "1762652579.480799", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Amaorynho/BBAI_375", + "developer": "Amaorynho", + "inference_platform": "unknown", + "id": "Amaorynho/BBAI_375" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14670518668244703 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2704366990167133 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3605416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11228390957446809 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.09 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Amu/t1-1.5B/3e967795-680c-4bfc-906b-eadb969cf2bd.json b/data/hfopenllm_v2/Amu/t1-1.5B/3e967795-680c-4bfc-906b-eadb969cf2bd.json new file mode 100644 index 000000000..865361cee --- /dev/null +++ b/data/hfopenllm_v2/Amu/t1-1.5B/3e967795-680c-4bfc-906b-eadb969cf2bd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Amu_t1-1.5B/1762652579.481014", + "retrieved_timestamp": "1762652579.481015", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Amu/t1-1.5B", + "developer": "Amu", + "inference_platform": "unknown", + "id": "Amu/t1-1.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3393717558300864 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4007606984109216 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24328859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3517083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2566489361702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Amu/t1-3B/c0b7e3e6-4160-4482-af4f-038ae79c7578.json b/data/hfopenllm_v2/Amu/t1-3B/c0b7e3e6-4160-4482-af4f-038ae79c7578.json new file mode 100644 index 000000000..a43460fea --- /dev/null +++ b/data/hfopenllm_v2/Amu/t1-3B/c0b7e3e6-4160-4482-af4f-038ae79c7578.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Amu_t1-3B/1762652579.481272", + "retrieved_timestamp": "1762652579.4812732", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Amu/t1-3B", + "developer": "Amu", + "inference_platform": "unknown", + "id": "Amu/t1-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33277703160946287 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39989750143834385 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13746223564954682 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2407718120805369 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34348958333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12840757978723405 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ArliAI/ArliAI-RPMax-12B-v1.1/63fc1679-8504-41a0-98d5-2d23aad57b81.json b/data/hfopenllm_v2/ArliAI/ArliAI-RPMax-12B-v1.1/63fc1679-8504-41a0-98d5-2d23aad57b81.json new file mode 100644 index 000000000..b2a410e0d --- /dev/null +++ b/data/hfopenllm_v2/ArliAI/ArliAI-RPMax-12B-v1.1/63fc1679-8504-41a0-98d5-2d23aad57b81.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ArliAI_ArliAI-RPMax-12B-v1.1/1762652579.481497", + "retrieved_timestamp": "1762652579.481498", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ArliAI/ArliAI-RPMax-12B-v1.1", + "developer": "ArliAI", + "inference_platform": "unknown", + "id": "ArliAI/ArliAI-RPMax-12B-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5348852156721942 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.475181760840119 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11253776435045318 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36184375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3384308510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Arthur-LAGACHERIE/Precis-1B-Instruct/d93c70b5-cb3b-4647-aa47-15c2401f5ebf.json b/data/hfopenllm_v2/Arthur-LAGACHERIE/Precis-1B-Instruct/d93c70b5-cb3b-4647-aa47-15c2401f5ebf.json new file mode 100644 index 000000000..ab1b37a96 --- /dev/null +++ b/data/hfopenllm_v2/Arthur-LAGACHERIE/Precis-1B-Instruct/d93c70b5-cb3b-4647-aa47-15c2401f5ebf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Arthur-LAGACHERIE_Precis-1B-Instruct/1762652579.482005", + "retrieved_timestamp": "1762652579.482006", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Arthur-LAGACHERIE/Precis-1B-Instruct", + "developer": "Arthur-LAGACHERIE", + "inference_platform": "unknown", + "id": "Arthur-LAGACHERIE/Precis-1B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3670738086056109 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3223614510687368 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0037764350453172208 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34355208333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14261968085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Artples/L-MChat-7b/7aeaf034-1c02-4da7-b7b4-9a27ce759601.json b/data/hfopenllm_v2/Artples/L-MChat-7b/7aeaf034-1c02-4da7-b7b4-9a27ce759601.json new file mode 100644 index 000000000..9a6458ce2 --- /dev/null +++ b/data/hfopenllm_v2/Artples/L-MChat-7b/7aeaf034-1c02-4da7-b7b4-9a27ce759601.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Artples_L-MChat-7b/1762652579.482251", + "retrieved_timestamp": "1762652579.482251", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Artples/L-MChat-7b", + "developer": "Artples", + "inference_platform": "unknown", + "id": "Artples/L-MChat-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5296646231997766 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46003301674679414 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09214501510574018 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4028645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3298703457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Artples/L-MChat-Small/0e5a84e3-b90f-4c20-ad58-4d1cf3517f28.json b/data/hfopenllm_v2/Artples/L-MChat-Small/0e5a84e3-b90f-4c20-ad58-4d1cf3517f28.json new file mode 100644 index 000000000..0c48fa262 --- /dev/null +++ b/data/hfopenllm_v2/Artples/L-MChat-Small/0e5a84e3-b90f-4c20-ad58-4d1cf3517f28.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Artples_L-MChat-Small/1762652579.4824991", + "retrieved_timestamp": "1762652579.4825", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Artples/L-MChat-Small", + "developer": "Artples", + "inference_platform": "unknown", + "id": "Artples/L-MChat-Small" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32870561222002065 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48225627665257265 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0377643504531722 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36959375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24642619680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Aryanne/SHBA/a1c56b87-d8d4-4570-9c33-b84dd066d92f.json b/data/hfopenllm_v2/Aryanne/SHBA/a1c56b87-d8d4-4570-9c33-b84dd066d92f.json new file mode 100644 index 000000000..cb5805307 --- /dev/null +++ b/data/hfopenllm_v2/Aryanne/SHBA/a1c56b87-d8d4-4570-9c33-b84dd066d92f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Aryanne_SHBA/1762652579.482961", + "retrieved_timestamp": "1762652579.482962", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aryanne/SHBA", + "developer": "Aryanne", + "inference_platform": "unknown", + "id": "Aryanne/SHBA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7816560060639104 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5233174837035715 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41613541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3892121010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Aryanne/SuperHeart/c6fae489-9bf8-40e5-a602-1c6ce9000537.json b/data/hfopenllm_v2/Aryanne/SuperHeart/c6fae489-9bf8-40e5-a602-1c6ce9000537.json new file mode 100644 index 000000000..9d8c9888d --- /dev/null +++ b/data/hfopenllm_v2/Aryanne/SuperHeart/c6fae489-9bf8-40e5-a602-1c6ce9000537.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Aryanne_SuperHeart/1762652579.483199", + "retrieved_timestamp": "1762652579.4832", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aryanne/SuperHeart", + "developer": "Aryanne", + "inference_platform": "unknown", + "id": "Aryanne/SuperHeart" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5192234382549413 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5215375046264326 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44357291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3912067819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Ateron/Glowing-Forest-12B/13716fd0-049a-4e9a-90ca-af9db59c1703.json b/data/hfopenllm_v2/Ateron/Glowing-Forest-12B/13716fd0-049a-4e9a-90ca-af9db59c1703.json new file mode 100644 index 000000000..600307e40 --- /dev/null +++ b/data/hfopenllm_v2/Ateron/Glowing-Forest-12B/13716fd0-049a-4e9a-90ca-af9db59c1703.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Ateron_Glowing-Forest-12B/1762652579.484101", + "retrieved_timestamp": "1762652579.4841018", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ateron/Glowing-Forest-12B", + "developer": "Ateron", + "inference_platform": "unknown", + "id": "Ateron/Glowing-Forest-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3591803082487799 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.549176294722067 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07779456193353475 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33305369127516776 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44490625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37175864361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Ateron/Lotus-Magpic/bedab846-a6b2-4c51-9690-27deb7a76fe7.json b/data/hfopenllm_v2/Ateron/Lotus-Magpic/bedab846-a6b2-4c51-9690-27deb7a76fe7.json new file mode 100644 index 000000000..3c4642a4e --- /dev/null +++ b/data/hfopenllm_v2/Ateron/Lotus-Magpic/bedab846-a6b2-4c51-9690-27deb7a76fe7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Ateron_Lotus-Magpic/1762652579.484373", + "retrieved_timestamp": "1762652579.484374", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ateron/Lotus-Magpic", + "developer": "Ateron", + "inference_platform": "unknown", + "id": "Ateron/Lotus-Magpic" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6286076499244228 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5253514950133299 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09969788519637462 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4331875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3490691489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Ateron/Way_of_MagPicaro/0a5e585d-1a90-4849-9df5-670a56b9f161.json b/data/hfopenllm_v2/Ateron/Way_of_MagPicaro/0a5e585d-1a90-4849-9df5-670a56b9f161.json new file mode 100644 index 000000000..3d5e7b239 --- /dev/null +++ b/data/hfopenllm_v2/Ateron/Way_of_MagPicaro/0a5e585d-1a90-4849-9df5-670a56b9f161.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Ateron_Way_of_MagPicaro/1762652579.484595", + "retrieved_timestamp": "1762652579.484596", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ateron/Way_of_MagPicaro", + "developer": "Ateron", + "inference_platform": "unknown", + "id": "Ateron/Way_of_MagPicaro" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2637091805298829 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5427386861946704 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3338926174496644 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46490625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35355718085106386 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AuraIndustries/Aura-4B/5fe88e89-1055-4357-9394-004dd4635e58.json b/data/hfopenllm_v2/AuraIndustries/Aura-4B/5fe88e89-1055-4357-9394-004dd4635e58.json new file mode 100644 index 000000000..7ad40e181 --- /dev/null +++ b/data/hfopenllm_v2/AuraIndustries/Aura-4B/5fe88e89-1055-4357-9394-004dd4635e58.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AuraIndustries_Aura-4B/1762652579.484812", + "retrieved_timestamp": "1762652579.484813", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AuraIndustries/Aura-4B", + "developer": "AuraIndustries", + "inference_platform": "unknown", + "id": "AuraIndustries/Aura-4B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38156203318306536 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4490409465001946 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04229607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39384375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27061170212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.513 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AuraIndustries/Aura-8B/39e029ad-b385-4b26-9a02-b40c90cd8ad8.json b/data/hfopenllm_v2/AuraIndustries/Aura-8B/39e029ad-b385-4b26-9a02-b40c90cd8ad8.json new file mode 100644 index 000000000..c67454836 --- /dev/null +++ b/data/hfopenllm_v2/AuraIndustries/Aura-8B/39e029ad-b385-4b26-9a02-b40c90cd8ad8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AuraIndustries_Aura-8B/1762652579.485057", + "retrieved_timestamp": "1762652579.485057", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AuraIndustries/Aura-8B", + "developer": "AuraIndustries", + "inference_platform": "unknown", + "id": "AuraIndustries/Aura-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7205315230255722 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5131231419849063 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15181268882175228 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4004479166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38738364361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B-v2/3402882b-af4e-4509-9d57-32efa5d8c495.json b/data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B-v2/3402882b-af4e-4509-9d57-32efa5d8c495.json new file mode 100644 index 000000000..5693b3193 --- /dev/null +++ b/data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B-v2/3402882b-af4e-4509-9d57-32efa5d8c495.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AuraIndustries_Aura-MoE-2x4B-v2/1762652579.4855082", + "retrieved_timestamp": "1762652579.4855092", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AuraIndustries/Aura-MoE-2x4B-v2", + "developer": "AuraIndustries", + "inference_platform": "unknown", + "id": "AuraIndustries/Aura-MoE-2x4B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4777822843388875 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43152444292813597 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03172205438066465 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4100625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609707446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 7.231 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B/8239ffac-3fca-4eab-86d4-78bab22dc420.json b/data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B/8239ffac-3fca-4eab-86d4-78bab22dc420.json new file mode 100644 index 000000000..bb5cb66aa --- /dev/null +++ b/data/hfopenllm_v2/AuraIndustries/Aura-MoE-2x4B/8239ffac-3fca-4eab-86d4-78bab22dc420.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AuraIndustries_Aura-MoE-2x4B/1762652579.48526", + "retrieved_timestamp": "1762652579.485261", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AuraIndustries/Aura-MoE-2x4B", + "developer": "AuraIndustries", + "inference_platform": "unknown", + "id": "AuraIndustries/Aura-MoE-2x4B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.460096987105325 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43385067041774666 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030966767371601207 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40851041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26496010638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 7.231 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Aurel9/testmerge-7b/eb45737a-74bc-482d-9d7f-d2bd1d876c77.json b/data/hfopenllm_v2/Aurel9/testmerge-7b/eb45737a-74bc-482d-9d7f-d2bd1d876c77.json new file mode 100644 index 000000000..dbaa6ce3e --- /dev/null +++ b/data/hfopenllm_v2/Aurel9/testmerge-7b/eb45737a-74bc-482d-9d7f-d2bd1d876c77.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Aurel9_testmerge-7b/1762652579.485724", + "retrieved_timestamp": "1762652579.485725", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aurel9/testmerge-7b", + "developer": "Aurel9", + "inference_platform": "unknown", + "id": "Aurel9/testmerge-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3979984219648311 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5189590919105128 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06570996978851963 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4658645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3052692819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Ayush-Singh/Llama1B-sft-2/678cad7f-854b-4dc3-91cc-2d1774ef7faf.json b/data/hfopenllm_v2/Ayush-Singh/Llama1B-sft-2/678cad7f-854b-4dc3-91cc-2d1774ef7faf.json new file mode 100644 index 000000000..ec3fca4c4 --- /dev/null +++ b/data/hfopenllm_v2/Ayush-Singh/Llama1B-sft-2/678cad7f-854b-4dc3-91cc-2d1774ef7faf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Ayush-Singh_Llama1B-sft-2/1762652579.4859679", + "retrieved_timestamp": "1762652579.4859688", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ayush-Singh/Llama1B-sft-2", + "developer": "Ayush-Singh", + "inference_platform": "unknown", + "id": "Ayush-Singh/Llama1B-sft-2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13743755457741016 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.283428204214368 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24580536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35520833333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11170212765957446 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Azure99/Blossom-V6-14B/24ce59a5-c351-4ed8-8944-8ec5db739da8.json b/data/hfopenllm_v2/Azure99/Blossom-V6-14B/24ce59a5-c351-4ed8-8944-8ec5db739da8.json new file mode 100644 index 000000000..f3857256d --- /dev/null +++ b/data/hfopenllm_v2/Azure99/Blossom-V6-14B/24ce59a5-c351-4ed8-8944-8ec5db739da8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Azure99_Blossom-V6-14B/1762652579.486225", + "retrieved_timestamp": "1762652579.4862258", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Azure99/Blossom-V6-14B", + "developer": "Azure99", + "inference_platform": "unknown", + "id": "Azure99/Blossom-V6-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6395486198841297 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5068726694646123 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.525679758308157 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40352083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4543716755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Azure99/Blossom-V6-7B/35949fb3-8c01-45cf-b4db-bbe983b15ac6.json b/data/hfopenllm_v2/Azure99/Blossom-V6-7B/35949fb3-8c01-45cf-b4db-bbe983b15ac6.json new file mode 100644 index 000000000..5a6876c4a --- /dev/null +++ b/data/hfopenllm_v2/Azure99/Blossom-V6-7B/35949fb3-8c01-45cf-b4db-bbe983b15ac6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Azure99_Blossom-V6-7B/1762652579.486468", + "retrieved_timestamp": "1762652579.486469", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Azure99/Blossom-V6-7B", + "developer": "Azure99", + "inference_platform": "unknown", + "id": "Azure99/Blossom-V6-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5538194213575536 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49736683240887 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45845921450151056 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43009375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41439494680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Azure99/blossom-v5-32b/6adfe39d-f2c2-4101-8f0f-7496d55397cd.json b/data/hfopenllm_v2/Azure99/blossom-v5-32b/6adfe39d-f2c2-4101-8f0f-7496d55397cd.json new file mode 100644 index 000000000..671fce54d --- /dev/null +++ b/data/hfopenllm_v2/Azure99/blossom-v5-32b/6adfe39d-f2c2-4101-8f0f-7496d55397cd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Azure99_blossom-v5-32b/1762652579.4866729", + "retrieved_timestamp": "1762652579.4866738", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Azure99/blossom-v5-32b", + "developer": "Azure99", + "inference_platform": "unknown", + "id": "Azure99/blossom-v5-32b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5235441960664371 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5954545257004673 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1865558912386707 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40199999999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4234541223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.512 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Azure99/blossom-v5.1-34b/d2342413-1b55-4da5-a6e5-da6274f309ad.json b/data/hfopenllm_v2/Azure99/blossom-v5.1-34b/d2342413-1b55-4da5-a6e5-da6274f309ad.json new file mode 100644 index 000000000..53abe01ec --- /dev/null +++ b/data/hfopenllm_v2/Azure99/blossom-v5.1-34b/d2342413-1b55-4da5-a6e5-da6274f309ad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Azure99_blossom-v5.1-34b/1762652579.4871309", + "retrieved_timestamp": "1762652579.4871309", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Azure99/blossom-v5.1-34b", + "developer": "Azure99", + "inference_platform": "unknown", + "id": "Azure99/blossom-v5.1-34b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5696562897556262 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6109110096611161 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2590634441087613 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39279166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4557845744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Azure99/blossom-v5.1-9b/8eb55323-b0d7-4419-aec6-03de8bcd472e.json b/data/hfopenllm_v2/Azure99/blossom-v5.1-9b/8eb55323-b0d7-4419-aec6-03de8bcd472e.json new file mode 100644 index 000000000..f61dcc05f --- /dev/null +++ b/data/hfopenllm_v2/Azure99/blossom-v5.1-9b/8eb55323-b0d7-4419-aec6-03de8bcd472e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Azure99_blossom-v5.1-9b/1762652579.487347", + "retrieved_timestamp": "1762652579.487348", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Azure99/blossom-v5.1-9b", + "developer": "Azure99", + "inference_platform": "unknown", + "id": "Azure99/blossom-v5.1-9b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5085816744016985 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5343292377916368 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2122356495468278 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33557046979865773 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39939583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39793882978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Llama3-70B/69cea95c-c167-42f4-a233-f7739f86f6a7.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Llama3-70B/69cea95c-c167-42f4-a233-f7739f86f6a7.json new file mode 100644 index 000000000..d8fe59d83 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Llama3-70B/69cea95c-c167-42f4-a233-f7739f86f6a7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0613-Llama3-70B/1762652579.487831", + "retrieved_timestamp": "1762652579.487832", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BAAI/Infinity-Instruct-3M-0613-Llama3-70B", + "developer": "BAAI", + "inference_platform": "unknown", + "id": "BAAI/Infinity-Instruct-3M-0613-Llama3-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6821134589555713 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6641614484348598 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21525679758308158 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35822147651006714 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45226041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47298869680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Mistral-7B/9d9ac91a-f339-41a4-ae91-3dba41b06382.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Mistral-7B/9d9ac91a-f339-41a4-ae91-3dba41b06382.json new file mode 100644 index 000000000..426cb5598 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0613-Mistral-7B/9d9ac91a-f339-41a4-ae91-3dba41b06382.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0613-Mistral-7B/1762652579.48831", + "retrieved_timestamp": "1762652579.4883142", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BAAI/Infinity-Instruct-3M-0613-Mistral-7B", + "developer": "BAAI", + "inference_platform": "unknown", + "id": "BAAI/Infinity-Instruct-3M-0613-Mistral-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5319873491225504 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49582333763258896 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08157099697885196 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4350833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31607380319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-70B/73eb53bc-a090-4415-8fdc-a767a2e00188.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-70B/73eb53bc-a090-4415-8fdc-a767a2e00188.json new file mode 100644 index 000000000..b05cb69e6 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-70B/73eb53bc-a090-4415-8fdc-a767a2e00188.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Llama3-70B/1762652579.4887528", + "retrieved_timestamp": "1762652579.488755", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BAAI/Infinity-Instruct-3M-0625-Llama3-70B", + "developer": "BAAI", + "inference_platform": "unknown", + "id": "BAAI/Infinity-Instruct-3M-0625-Llama3-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7442120240960651 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6670337872930245 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22507552870090636 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3573825503355705 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46165625000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4586103723404255 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-8B/00d87824-732a-4746-8d9f-ce7b1f45c0ae.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-8B/00d87824-732a-4746-8d9f-ce7b1f45c0ae.json new file mode 100644 index 000000000..4afbfb415 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Llama3-8B/00d87824-732a-4746-8d9f-ce7b1f45c0ae.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Llama3-8B/1762652579.4890082", + "retrieved_timestamp": "1762652579.489009", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BAAI/Infinity-Instruct-3M-0625-Llama3-8B", + "developer": "BAAI", + "inference_platform": "unknown", + "id": "BAAI/Infinity-Instruct-3M-0625-Llama3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6050268842227512 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4954985723563075 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08836858006042296 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37120833333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3252160904255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Mistral-7B/be3423f2-98f0-414a-b0c3-efd0d60d4cb3.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Mistral-7B/be3423f2-98f0-414a-b0c3-efd0d60d4cb3.json new file mode 100644 index 000000000..3d04b7f66 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Mistral-7B/be3423f2-98f0-414a-b0c3-efd0d60d4cb3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Mistral-7B/1762652579.489246", + "retrieved_timestamp": "1762652579.489247", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BAAI/Infinity-Instruct-3M-0625-Mistral-7B", + "developer": "BAAI", + "inference_platform": "unknown", + "id": "BAAI/Infinity-Instruct-3M-0625-Mistral-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5867420666054957 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4939670574681802 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07628398791540786 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42723958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3229720744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Qwen2-7B/2390d668-3273-4f58-a0fd-b13b9d9b1651.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Qwen2-7B/2390d668-3273-4f58-a0fd-b13b9d9b1651.json new file mode 100644 index 000000000..367ecd177 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Qwen2-7B/2390d668-3273-4f58-a0fd-b13b9d9b1651.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Qwen2-7B/1762652579.489471", + "retrieved_timestamp": "1762652579.489472", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BAAI/Infinity-Instruct-3M-0625-Qwen2-7B", + "developer": "BAAI", + "inference_platform": "unknown", + "id": "BAAI/Infinity-Instruct-3M-0625-Qwen2-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5553930238434022 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5345911997776569 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19259818731117825 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38876041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39602726063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B/8a2d5e9c-7d41-4638-8b8c-58d08fc0912b.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B/8a2d5e9c-7d41-4638-8b8c-58d08fc0912b.json new file mode 100644 index 000000000..a42525cda --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B/8a2d5e9c-7d41-4638-8b8c-58d08fc0912b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Yi-1.5-9B/1762652579.489686", + "retrieved_timestamp": "1762652579.489687", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B", + "developer": "BAAI", + "inference_platform": "unknown", + "id": "BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5185984299436606 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5509115146247398 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16389728096676737 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3540268456375839 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45753125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41181848404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B/eace7f56-b853-436d-a744-bfdb9e227993.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B/eace7f56-b853-436d-a744-bfdb9e227993.json new file mode 100644 index 000000000..6ec710e81 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B/eace7f56-b853-436d-a744-bfdb9e227993.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-0729-Llama3_1-8B/1762652579.489912", + "retrieved_timestamp": "1762652579.489913", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B", + "developer": "BAAI", + "inference_platform": "unknown", + "id": "BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6131952109292234 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5077335431381055 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12764350453172205 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35784375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3223902925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-mistral-7B/25477dff-04c5-4cb8-9ad9-3a13448a2a7d.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-mistral-7B/25477dff-04c5-4cb8-9ad9-3a13448a2a7d.json new file mode 100644 index 000000000..1ee536947 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-0729-mistral-7B/25477dff-04c5-4cb8-9ad9-3a13448a2a7d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-0729-mistral-7B/1762652579.490131", + "retrieved_timestamp": "1762652579.490131", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BAAI/Infinity-Instruct-7M-0729-mistral-7B", + "developer": "BAAI", + "inference_platform": "unknown", + "id": "BAAI/Infinity-Instruct-7M-0729-mistral-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6161928128476886 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4963813586525743 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4061875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3273769946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B/b04b4e4d-2f15-446b-974f-21f72fd80fe0.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B/b04b4e4d-2f15-446b-974f-21f72fd80fe0.json new file mode 100644 index 000000000..e41be6356 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B/b04b4e4d-2f15-446b-974f-21f72fd80fe0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-70B/1762652579.490346", + "retrieved_timestamp": "1762652579.490347", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B", + "developer": "BAAI", + "inference_platform": "unknown", + "id": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7335458804859993 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6695200461367471 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25226586102719034 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37583892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45390625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.460688164893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B/84f2027c-3e68-489e-902b-2fec6ec8f850.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B/84f2027c-3e68-489e-902b-2fec6ec8f850.json new file mode 100644 index 000000000..91365b830 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B/84f2027c-3e68-489e-902b-2fec6ec8f850.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-8B/1762652579.4905548", + "retrieved_timestamp": "1762652579.490556", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B", + "developer": "BAAI", + "inference_platform": "unknown", + "id": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6131952109292234 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5077335431381055 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12764350453172205 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35784375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3223902925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-mistral-7B/51daf5e7-1d4e-4753-b24b-79273e6f9370.json b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-mistral-7B/51daf5e7-1d4e-4753-b24b-79273e6f9370.json new file mode 100644 index 000000000..6346de938 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/Infinity-Instruct-7M-Gen-mistral-7B/51daf5e7-1d4e-4753-b24b-79273e6f9370.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-Gen-mistral-7B/1762652579.490771", + "retrieved_timestamp": "1762652579.490772", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BAAI/Infinity-Instruct-7M-Gen-mistral-7B", + "developer": "BAAI", + "inference_platform": "unknown", + "id": "BAAI/Infinity-Instruct-7M-Gen-mistral-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6146690780462506 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4963813586525743 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4061875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3273769946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BAAI/OPI-Llama-3.1-8B-Instruct/567f27f3-3f64-4054-aa67-684c29e4d71a.json b/data/hfopenllm_v2/BAAI/OPI-Llama-3.1-8B-Instruct/567f27f3-3f64-4054-aa67-684c29e4d71a.json new file mode 100644 index 000000000..d92e1d660 --- /dev/null +++ b/data/hfopenllm_v2/BAAI/OPI-Llama-3.1-8B-Instruct/567f27f3-3f64-4054-aa67-684c29e4d71a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BAAI_OPI-Llama-3.1-8B-Instruct/1762652579.490996", + "retrieved_timestamp": "1762652579.490996", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BAAI/OPI-Llama-3.1-8B-Instruct", + "developer": "BAAI", + "inference_platform": "unknown", + "id": "BAAI/OPI-Llama-3.1-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20745510800232272 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3551224419497605 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3233020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21243351063829788 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/112be4bf-bfac-470f-bde8-c1e4d7282667.json b/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/112be4bf-bfac-470f-bde8-c1e4d7282667.json new file mode 100644 index 000000000..71e72e2d2 --- /dev/null +++ b/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/112be4bf-bfac-470f-bde8-c1e4d7282667.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BEE-spoke-data_tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/1762652579.492853", + "retrieved_timestamp": "1762652579.492853", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024", + "developer": "BEE-spoke-data", + "inference_platform": "unknown", + "id": "BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13206735905176042 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3137786304497592 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43927083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12367021276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 0.887 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan/cdf0ce69-4697-4f16-a769-80691cc08b27.json b/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan/cdf0ce69-4697-4f16-a769-80691cc08b27.json new file mode 100644 index 000000000..ad3cc2bd9 --- /dev/null +++ b/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-flan/cdf0ce69-4697-4f16-a769-80691cc08b27.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BEE-spoke-data_tFINE-900m-e16-d32-flan/1762652579.492592", + "retrieved_timestamp": "1762652579.492592", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BEE-spoke-data/tFINE-900m-e16-d32-flan", + "developer": "BEE-spoke-data", + "inference_platform": "unknown", + "id": "BEE-spoke-data/tFINE-900m-e16-d32-flan" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15057713533424646 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30280434847620613 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.009818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2332214765100671 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3724166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1307347074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 0.887 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e/7b1574ca-4106-42c0-9336-27df4f0851aa.json b/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e/7b1574ca-4106-42c0-9336-27df4f0851aa.json new file mode 100644 index 000000000..374b0997c --- /dev/null +++ b/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e/7b1574ca-4106-42c0-9336-27df4f0851aa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BEE-spoke-data_tFINE-900m-e16-d32-instruct_2e/1762652579.493063", + "retrieved_timestamp": "1762652579.493064", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e", + "developer": "BEE-spoke-data", + "inference_platform": "unknown", + "id": "BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1402855534426433 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31345674638809023 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42069791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12367021276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 0.887 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-instruct-orpo/e91b6749-3103-4cfa-bf16-86126ee2086e.json b/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-instruct-orpo/e91b6749-3103-4cfa-bf16-86126ee2086e.json new file mode 100644 index 000000000..48cdf2542 --- /dev/null +++ b/data/hfopenllm_v2/BEE-spoke-data/tFINE-900m-instruct-orpo/e91b6749-3103-4cfa-bf16-86126ee2086e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BEE-spoke-data_tFINE-900m-instruct-orpo/1762652579.493278", + "retrieved_timestamp": "1762652579.493279", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BEE-spoke-data/tFINE-900m-instruct-orpo", + "developer": "BEE-spoke-data", + "inference_platform": "unknown", + "id": "BEE-spoke-data/tFINE-900m-instruct-orpo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13299157346950535 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30220933767045094 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3408541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11519281914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 0.887 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BSC-LT/salamandra-7b-instruct/2eb60f3a-53f4-478a-8292-aa5e210a8cdf.json b/data/hfopenllm_v2/BSC-LT/salamandra-7b-instruct/2eb60f3a-53f4-478a-8292-aa5e210a8cdf.json new file mode 100644 index 000000000..ebf36d902 --- /dev/null +++ b/data/hfopenllm_v2/BSC-LT/salamandra-7b-instruct/2eb60f3a-53f4-478a-8292-aa5e210a8cdf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BSC-LT_salamandra-7b-instruct/1762652579.493781", + "retrieved_timestamp": "1762652579.493781", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BSC-LT/salamandra-7b-instruct", + "developer": "BSC-LT", + "inference_platform": "unknown", + "id": "BSC-LT/salamandra-7b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24507418095098782 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3851324290080956 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.008308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41343749999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18051861702127658 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.768 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BSC-LT/salamandra-7b/36d2d3af-60aa-4624-b414-e249d06b6ee1.json b/data/hfopenllm_v2/BSC-LT/salamandra-7b/36d2d3af-60aa-4624-b414-e249d06b6ee1.json new file mode 100644 index 000000000..2877eafae --- /dev/null +++ b/data/hfopenllm_v2/BSC-LT/salamandra-7b/36d2d3af-60aa-4624-b414-e249d06b6ee1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BSC-LT_salamandra-7b/1762652579.493503", + "retrieved_timestamp": "1762652579.493503", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BSC-LT/salamandra-7b", + "developer": "BSC-LT", + "inference_platform": "unknown", + "id": "BSC-LT/salamandra-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13673829882489574 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3516612209885983 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0037764350453172208 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35009375000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14926861702127658 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.768 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB/b1632b15-fa00-4476-b3f4-05aba95df664.json b/data/hfopenllm_v2/Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB/b1632b15-fa00-4476-b3f4-05aba95df664.json new file mode 100644 index 000000000..a65778130 --- /dev/null +++ b/data/hfopenllm_v2/Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB/b1632b15-fa00-4476-b3f4-05aba95df664.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Baptiste-HUVELLE-10_LeTriomphant2.2_ECE_iLAB/1762652579.4943", + "retrieved_timestamp": "1762652579.4943008", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB", + "developer": "Baptiste-HUVELLE-10", + "inference_platform": "unknown", + "id": "Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5076330802271307 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7256319952414622 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44486404833836857 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39932885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46255208333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5851063829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0/ad8e3029-612c-434e-a92b-f5c481476e25.json b/data/hfopenllm_v2/BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0/ad8e3029-612c-434e-a92b-f5c481476e25.json new file mode 100644 index 000000000..369f9325e --- /dev/null +++ b/data/hfopenllm_v2/BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0/ad8e3029-612c-434e-a92b-f5c481476e25.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BenevolenceMessiah_Qwen2.5-72B-2x-Instruct-TIES-v1.0/1762652579.4945831", + "retrieved_timestamp": "1762652579.494584", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0", + "developer": "BenevolenceMessiah", + "inference_platform": "unknown", + "id": "BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5473499204333391 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.727311411382245 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5785498489425982 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3674496644295302 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4206666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5628324468085106 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.7 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/129ba653-ec88-46f2-8828-77e320b922c6.json b/data/hfopenllm_v2/BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/129ba653-ec88-46f2-8828-77e320b922c6.json new file mode 100644 index 000000000..ae809373b --- /dev/null +++ b/data/hfopenllm_v2/BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/129ba653-ec88-46f2-8828-77e320b922c6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BenevolenceMessiah_Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/1762652579.4948769", + "retrieved_timestamp": "1762652579.494878", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0", + "developer": "BenevolenceMessiah", + "inference_platform": "unknown", + "id": "BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011531624977283 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4908666248538678 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04154078549848943 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4079791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26803523936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 28.309 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BlackBeenie/Bloslain-8B-v0.2/160fb625-9c1c-40c1-ab93-7d9f7a2220d2.json b/data/hfopenllm_v2/BlackBeenie/Bloslain-8B-v0.2/160fb625-9c1c-40c1-ab93-7d9f7a2220d2.json new file mode 100644 index 000000000..a7bc1d1e9 --- /dev/null +++ b/data/hfopenllm_v2/BlackBeenie/Bloslain-8B-v0.2/160fb625-9c1c-40c1-ab93-7d9f7a2220d2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BlackBeenie_Bloslain-8B-v0.2/1762652579.495104", + "retrieved_timestamp": "1762652579.495104", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BlackBeenie/Bloslain-8B-v0.2", + "developer": "BlackBeenie", + "inference_platform": "unknown", + "id": "BlackBeenie/Bloslain-8B-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5023371321427147 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.511087946253543 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14501510574018128 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4075729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3653590425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1/b298e0fc-f4fb-4464-beb8-45f8b5f35653.json b/data/hfopenllm_v2/BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1/b298e0fc-f4fb-4464-beb8-45f8b5f35653.json new file mode 100644 index 000000000..785931f94 --- /dev/null +++ b/data/hfopenllm_v2/BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1/b298e0fc-f4fb-4464-beb8-45f8b5f35653.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BlackBeenie_Llama-3.1-8B-OpenO1-SFT-v0.1/1762652579.495378", + "retrieved_timestamp": "1762652579.495378", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1", + "developer": "BlackBeenie", + "inference_platform": "unknown", + "id": "BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5124037553690873 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4787448361604986 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15256797583081572 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36181250000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34915226063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BoltMonkey/DreadMix/e6b5e728-28a4-444a-8b6b-89d29b7b5225.json b/data/hfopenllm_v2/BoltMonkey/DreadMix/e6b5e728-28a4-444a-8b6b-89d29b7b5225.json new file mode 100644 index 000000000..96989649a --- /dev/null +++ b/data/hfopenllm_v2/BoltMonkey/DreadMix/e6b5e728-28a4-444a-8b6b-89d29b7b5225.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BoltMonkey_DreadMix/1762652579.497959", + "retrieved_timestamp": "1762652579.497961", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BoltMonkey/DreadMix", + "developer": "BoltMonkey", + "inference_platform": "unknown", + "id": "BoltMonkey/DreadMix" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7094908176970438 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5435097438362475 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1555891238670695 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42121875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37898936170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/d9e3bd73-cd7e-46d4-9e62-0cfac178f62a.json b/data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/d9e3bd73-cd7e-46d4-9e62-0cfac178f62a.json new file mode 100644 index 000000000..fcdfc5295 --- /dev/null +++ b/data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/d9e3bd73-cd7e-46d4-9e62-0cfac178f62a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/1762652579.498452", + "retrieved_timestamp": "1762652579.498454", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated", + "developer": "BoltMonkey", + "inference_platform": "unknown", + "id": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7998909559967553 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5151987922850448 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11933534743202417 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.401875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37333776595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/f83a5d67-b967-47c8-b76e-b58c445a3634.json b/data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/f83a5d67-b967-47c8-b76e-b58c445a3634.json new file mode 100644 index 000000000..5f5c2c0a0 --- /dev/null +++ b/data/hfopenllm_v2/BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/f83a5d67-b967-47c8-b76e-b58c445a3634.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/1762652579.498964", + "retrieved_timestamp": "1762652579.498965", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated", + "developer": "BoltMonkey", + "inference_platform": "unknown", + "id": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45902316963434797 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5185441912447182 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09365558912386707 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4082604166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3631150265957447 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BoltMonkey/SuperNeuralDreadDevil-8b/2ad0eebb-31e3-4f28-aba6-073f33d5cbed.json b/data/hfopenllm_v2/BoltMonkey/SuperNeuralDreadDevil-8b/2ad0eebb-31e3-4f28-aba6-073f33d5cbed.json new file mode 100644 index 000000000..f3ce3a86e --- /dev/null +++ b/data/hfopenllm_v2/BoltMonkey/SuperNeuralDreadDevil-8b/2ad0eebb-31e3-4f28-aba6-073f33d5cbed.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BoltMonkey_SuperNeuralDreadDevil-8b/1762652579.499188", + "retrieved_timestamp": "1762652579.499189", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BoltMonkey/SuperNeuralDreadDevil-8b", + "developer": "BoltMonkey", + "inference_platform": "unknown", + "id": "BoltMonkey/SuperNeuralDreadDevil-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7709898624538447 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5286196012035721 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09290030211480363 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39768749999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36785239361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BramVanroy/GEITje-7B-ultra/efcc28d3-ca6a-4100-afd2-75f9925354ba.json b/data/hfopenllm_v2/BramVanroy/GEITje-7B-ultra/efcc28d3-ca6a-4100-afd2-75f9925354ba.json new file mode 100644 index 000000000..32afe6b22 --- /dev/null +++ b/data/hfopenllm_v2/BramVanroy/GEITje-7B-ultra/efcc28d3-ca6a-4100-afd2-75f9925354ba.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BramVanroy_GEITje-7B-ultra/1762652579.499682", + "retrieved_timestamp": "1762652579.4996831", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BramVanroy/GEITje-7B-ultra", + "developer": "BramVanroy", + "inference_platform": "unknown", + "id": "BramVanroy/GEITje-7B-ultra" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3723442687624392 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37761612997305494 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32897916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20113031914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BramVanroy/fietje-2-chat/faf20d1a-5a92-49b2-be69-903cafb9460a.json b/data/hfopenllm_v2/BramVanroy/fietje-2-chat/faf20d1a-5a92-49b2-be69-903cafb9460a.json new file mode 100644 index 000000000..755acdf50 --- /dev/null +++ b/data/hfopenllm_v2/BramVanroy/fietje-2-chat/faf20d1a-5a92-49b2-be69-903cafb9460a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BramVanroy_fietje-2-chat/1762652579.500146", + "retrieved_timestamp": "1762652579.5001469", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BramVanroy/fietje-2-chat", + "developer": "BramVanroy", + "inference_platform": "unknown", + "id": "BramVanroy/fietje-2-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2917359273394593 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4149753717401999 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0188821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23993288590604026 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3527604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20545212765957446 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "PhiForCausalLM", + "params_billions": 2.775 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BramVanroy/fietje-2-instruct/03e122da-30cc-4c2e-9b44-8261c3f2a934.json b/data/hfopenllm_v2/BramVanroy/fietje-2-instruct/03e122da-30cc-4c2e-9b44-8261c3f2a934.json new file mode 100644 index 000000000..e42d671b5 --- /dev/null +++ b/data/hfopenllm_v2/BramVanroy/fietje-2-instruct/03e122da-30cc-4c2e-9b44-8261c3f2a934.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BramVanroy_fietje-2-instruct/1762652579.500353", + "retrieved_timestamp": "1762652579.500354", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BramVanroy/fietje-2-instruct", + "developer": "BramVanroy", + "inference_platform": "unknown", + "id": "BramVanroy/fietje-2-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2789963962286732 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41360714173029806 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.022658610271903322 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2332214765100671 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3369166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2103557180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "PhiForCausalLM", + "params_billions": 2.775 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/BramVanroy/fietje-2/3712e2c3-0ed1-4dc9-95fc-4be0bec18675.json b/data/hfopenllm_v2/BramVanroy/fietje-2/3712e2c3-0ed1-4dc9-95fc-4be0bec18675.json new file mode 100644 index 000000000..5fbf7cf1a --- /dev/null +++ b/data/hfopenllm_v2/BramVanroy/fietje-2/3712e2c3-0ed1-4dc9-95fc-4be0bec18675.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BramVanroy_fietje-2/1762652579.499938", + "retrieved_timestamp": "1762652579.499939", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BramVanroy/fietje-2", + "developer": "BramVanroy", + "inference_platform": "unknown", + "id": "BramVanroy/fietje-2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20980332185268422 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40356695178386187 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3695625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19855385638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CYFRAGOVPL/Llama-PLLuM-8B-chat/cb833a8b-81d7-41a6-bff2-9d0927703113.json b/data/hfopenllm_v2/CYFRAGOVPL/Llama-PLLuM-8B-chat/cb833a8b-81d7-41a6-bff2-9d0927703113.json new file mode 100644 index 000000000..eac0c8b8c --- /dev/null +++ b/data/hfopenllm_v2/CYFRAGOVPL/Llama-PLLuM-8B-chat/cb833a8b-81d7-41a6-bff2-9d0927703113.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_Llama-PLLuM-8B-chat/1762652579.5008068", + "retrieved_timestamp": "1762652579.500808", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CYFRAGOVPL/Llama-PLLuM-8B-chat", + "developer": "CYFRAGOVPL", + "inference_platform": "unknown", + "id": "CYFRAGOVPL/Llama-PLLuM-8B-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3514862786295917 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40770722535589576 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.033987915407854986 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41991666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27194148936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-base/76833817-781e-4292-9fe8-5e8a1da7f962.json b/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-base/76833817-781e-4292-9fe8-5e8a1da7f962.json new file mode 100644 index 000000000..57b80877c --- /dev/null +++ b/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-base/76833817-781e-4292-9fe8-5e8a1da7f962.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_PLLuM-12B-base/1762652579.501051", + "retrieved_timestamp": "1762652579.501052", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CYFRAGOVPL/PLLuM-12B-base", + "developer": "CYFRAGOVPL", + "inference_platform": "unknown", + "id": "CYFRAGOVPL/PLLuM-12B-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2820937335159599 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4390596143784447 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028700906344410877 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4142395833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2740192819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-chat/6e325f0f-b5db-4773-8179-7e949bd3f5f2.json b/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-chat/6e325f0f-b5db-4773-8179-7e949bd3f5f2.json new file mode 100644 index 000000000..f5de95952 --- /dev/null +++ b/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-chat/6e325f0f-b5db-4773-8179-7e949bd3f5f2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_PLLuM-12B-chat/1762652579.501271", + "retrieved_timestamp": "1762652579.501272", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CYFRAGOVPL/PLLuM-12B-chat", + "developer": "CYFRAGOVPL", + "inference_platform": "unknown", + "id": "CYFRAGOVPL/PLLuM-12B-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32143601200370575 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44458000333075703 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01812688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4114791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2872340425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-base/e9b90a3b-09c6-4d3b-9aa3-6279ea3cccb5.json b/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-base/e9b90a3b-09c6-4d3b-9aa3-6279ea3cccb5.json new file mode 100644 index 000000000..9590ea3f8 --- /dev/null +++ b/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-base/e9b90a3b-09c6-4d3b-9aa3-6279ea3cccb5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_PLLuM-12B-nc-base/1762652579.501493", + "retrieved_timestamp": "1762652579.501494", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CYFRAGOVPL/PLLuM-12B-nc-base", + "developer": "CYFRAGOVPL", + "inference_platform": "unknown", + "id": "CYFRAGOVPL/PLLuM-12B-nc-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24045310886226323 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42767589675970014 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36451041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25590093085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-chat/fd19dada-5945-45d5-8a84-122404b8dd57.json b/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-chat/fd19dada-5945-45d5-8a84-122404b8dd57.json new file mode 100644 index 000000000..5b55bc1d9 --- /dev/null +++ b/data/hfopenllm_v2/CYFRAGOVPL/PLLuM-12B-nc-chat/fd19dada-5945-45d5-8a84-122404b8dd57.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_PLLuM-12B-nc-chat/1762652579.501705", + "retrieved_timestamp": "1762652579.501706", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CYFRAGOVPL/PLLuM-12B-nc-chat", + "developer": "CYFRAGOVPL", + "inference_platform": "unknown", + "id": "CYFRAGOVPL/PLLuM-12B-nc-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28344237733657807 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45764328318815456 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4353541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25972406914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412/41809335-e00c-4911-bc08-6edd71891585.json b/data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412/41809335-e00c-4911-bc08-6edd71891585.json new file mode 100644 index 000000000..223ac119e --- /dev/null +++ b/data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412/41809335-e00c-4911-bc08-6edd71891585.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct-2412/1762652579.5021691", + "retrieved_timestamp": "1762652579.50217", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412", + "developer": "CarrotAI", + "inference_platform": "unknown", + "id": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47818233398493776 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43577246498246686 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17598187311178248 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3872083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31341422872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct/8c56b973-d5cb-48b6-a43e-ad50769b1f40.json b/data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct/8c56b973-d5cb-48b6-a43e-ad50769b1f40.json new file mode 100644 index 000000000..08745a2a0 --- /dev/null +++ b/data/hfopenllm_v2/CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct/8c56b973-d5cb-48b6-a43e-ad50769b1f40.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct/1762652579.501917", + "retrieved_timestamp": "1762652579.5019178", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct", + "developer": "CarrotAI", + "inference_platform": "unknown", + "id": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7198821349574684 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4426719080820793 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2054380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3649166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2822473404255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B/da5c1edf-bd74-48a3-ad76-a4bd89539b7f.json b/data/hfopenllm_v2/Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B/da5c1edf-bd74-48a3-ad76-a4bd89539b7f.json new file mode 100644 index 000000000..c65d28fdd --- /dev/null +++ b/data/hfopenllm_v2/Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B/da5c1edf-bd74-48a3-ad76-a4bd89539b7f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Casual-Autopsy_L3-Umbral-Mind-RP-v2.0-8B/1762652579.502389", + "retrieved_timestamp": "1762652579.502389", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B", + "developer": "Casual-Autopsy", + "inference_platform": "unknown", + "id": "Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7122634609502786 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5262406145493724 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1095166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3686666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3723404255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CausalLM/14B/c4376867-854d-44fa-9215-b9c1af7612a4.json b/data/hfopenllm_v2/CausalLM/14B/c4376867-854d-44fa-9215-b9c1af7612a4.json new file mode 100644 index 000000000..9edbf818f --- /dev/null +++ b/data/hfopenllm_v2/CausalLM/14B/c4376867-854d-44fa-9215-b9c1af7612a4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CausalLM_14B/1762652579.502646", + "retrieved_timestamp": "1762652579.502647", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CausalLM/14B", + "developer": "CausalLM", + "inference_platform": "unknown", + "id": "CausalLM/14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2788213052478535 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4700462397700626 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0755287009063444 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4154791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221409574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CausalLM/34b-beta/cc482ca4-031a-4c22-90c2-68322184125b.json b/data/hfopenllm_v2/CausalLM/34b-beta/cc482ca4-031a-4c22-90c2-68322184125b.json new file mode 100644 index 000000000..d67cbc790 --- /dev/null +++ b/data/hfopenllm_v2/CausalLM/34b-beta/cc482ca4-031a-4c22-90c2-68322184125b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CausalLM_34b-beta/1762652579.502916", + "retrieved_timestamp": "1762652579.502916", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CausalLM/34b-beta", + "developer": "CausalLM", + "inference_platform": "unknown", + "id": "CausalLM/34b-beta" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3043247472262486 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5590996102136266 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04833836858006042 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3464765100671141 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37486458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5324966755319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CausalLM/preview-1-hf/e9fcf09c-14e2-4226-b1e5-b5752ac1a753.json b/data/hfopenllm_v2/CausalLM/preview-1-hf/e9fcf09c-14e2-4226-b1e5-b5752ac1a753.json new file mode 100644 index 000000000..744daa3ad --- /dev/null +++ b/data/hfopenllm_v2/CausalLM/preview-1-hf/e9fcf09c-14e2-4226-b1e5-b5752ac1a753.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CausalLM_preview-1-hf/1762652579.503128", + "retrieved_timestamp": "1762652579.503129", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CausalLM/preview-1-hf", + "developer": "CausalLM", + "inference_platform": "unknown", + "id": "CausalLM/preview-1-hf" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5558928088582737 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3614567463880903 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030211480362537766 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34218750000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35970744680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GlmForCausalLM", + "params_billions": 9.543 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Changgil/K2S3-14b-v0.2/4dfe2d3c-7fc3-4b57-8acd-02b0808ccdb1.json b/data/hfopenllm_v2/Changgil/K2S3-14b-v0.2/4dfe2d3c-7fc3-4b57-8acd-02b0808ccdb1.json new file mode 100644 index 000000000..01ffe0289 --- /dev/null +++ b/data/hfopenllm_v2/Changgil/K2S3-14b-v0.2/4dfe2d3c-7fc3-4b57-8acd-02b0808ccdb1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Changgil_K2S3-14b-v0.2/1762652579.503338", + "retrieved_timestamp": "1762652579.503339", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Changgil/K2S3-14b-v0.2", + "developer": "Changgil", + "inference_platform": "unknown", + "id": "Changgil/K2S3-14b-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3242840108689389 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4613311786298187 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3922604166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2643783244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 14.352 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Changgil/K2S3-v0.1/225bc36b-4bfb-4818-8601-903e7f9decb3.json b/data/hfopenllm_v2/Changgil/K2S3-v0.1/225bc36b-4bfb-4818-8601-903e7f9decb3.json new file mode 100644 index 000000000..885ce58b9 --- /dev/null +++ b/data/hfopenllm_v2/Changgil/K2S3-v0.1/225bc36b-4bfb-4818-8601-903e7f9decb3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Changgil_K2S3-v0.1/1762652579.503593", + "retrieved_timestamp": "1762652579.503594", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Changgil/K2S3-v0.1", + "developer": "Changgil", + "inference_platform": "unknown", + "id": "Changgil/K2S3-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32765617450586665 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46554920672286154 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04607250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40140624999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2562333776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 14.352 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ClaudioItaly/Albacus/0be5437b-2489-4107-8c38-d0cd198a2d8c.json b/data/hfopenllm_v2/ClaudioItaly/Albacus/0be5437b-2489-4107-8c38-d0cd198a2d8c.json new file mode 100644 index 000000000..77561cff4 --- /dev/null +++ b/data/hfopenllm_v2/ClaudioItaly/Albacus/0be5437b-2489-4107-8c38-d0cd198a2d8c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ClaudioItaly_Albacus/1762652579.503804", + "retrieved_timestamp": "1762652579.503805", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ClaudioItaly/Albacus", + "developer": "ClaudioItaly", + "inference_platform": "unknown", + "id": "ClaudioItaly/Albacus" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4667415790103592 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5113043406568835 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07099697885196375 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41353124999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31648936170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.987 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ClaudioItaly/Book-Gut12B/b2bdf337-9065-4a67-aa1a-5ba8751d5438.json b/data/hfopenllm_v2/ClaudioItaly/Book-Gut12B/b2bdf337-9065-4a67-aa1a-5ba8751d5438.json new file mode 100644 index 000000000..2c1e32532 --- /dev/null +++ b/data/hfopenllm_v2/ClaudioItaly/Book-Gut12B/b2bdf337-9065-4a67-aa1a-5ba8751d5438.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ClaudioItaly_Book-Gut12B/1762652579.504094", + "retrieved_timestamp": "1762652579.504095", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ClaudioItaly/Book-Gut12B", + "developer": "ClaudioItaly", + "inference_platform": "unknown", + "id": "ClaudioItaly/Book-Gut12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39984685080032095 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5417370194443233 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10196374622356495 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4635416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3670212765957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ClaudioItaly/Evolutionstory-7B-v2.2/e06c19ce-9247-473b-b5db-8686fee5e785.json b/data/hfopenllm_v2/ClaudioItaly/Evolutionstory-7B-v2.2/e06c19ce-9247-473b-b5db-8686fee5e785.json new file mode 100644 index 000000000..333f59d96 --- /dev/null +++ b/data/hfopenllm_v2/ClaudioItaly/Evolutionstory-7B-v2.2/e06c19ce-9247-473b-b5db-8686fee5e785.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ClaudioItaly_Evolutionstory-7B-v2.2/1762652579.504309", + "retrieved_timestamp": "1762652579.504309", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ClaudioItaly/Evolutionstory-7B-v2.2", + "developer": "ClaudioItaly", + "inference_platform": "unknown", + "id": "ClaudioItaly/Evolutionstory-7B-v2.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4813794066410457 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5108043406568835 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07099697885196375 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41353124999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31590757978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ClaudioItaly/intelligence-cod-rag-7b-v3/51559a6d-1262-41e2-8092-008dc8f53974.json b/data/hfopenllm_v2/ClaudioItaly/intelligence-cod-rag-7b-v3/51559a6d-1262-41e2-8092-008dc8f53974.json new file mode 100644 index 000000000..3e0fdf20a --- /dev/null +++ b/data/hfopenllm_v2/ClaudioItaly/intelligence-cod-rag-7b-v3/51559a6d-1262-41e2-8092-008dc8f53974.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ClaudioItaly_intelligence-cod-rag-7b-v3/1762652579.504531", + "retrieved_timestamp": "1762652579.504531", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ClaudioItaly/intelligence-cod-rag-7b-v3", + "developer": "ClaudioItaly", + "inference_platform": "unknown", + "id": "ClaudioItaly/intelligence-cod-rag-7b-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6897820006471718 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5366339718839108 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3806646525679758 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4152708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4195478723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CohereForAI/aya-23-35B/9c77aa3f-080c-4dd6-8a9d-50d18657de35.json b/data/hfopenllm_v2/CohereForAI/aya-23-35B/9c77aa3f-080c-4dd6-8a9d-50d18657de35.json new file mode 100644 index 000000000..31739a37e --- /dev/null +++ b/data/hfopenllm_v2/CohereForAI/aya-23-35B/9c77aa3f-080c-4dd6-8a9d-50d18657de35.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CohereForAI_aya-23-35B/1762652579.5047522", + "retrieved_timestamp": "1762652579.5047529", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CohereForAI/aya-23-35B", + "developer": "CohereForAI", + "inference_platform": "unknown", + "id": "CohereForAI/aya-23-35B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6461932117891638 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5399551450731271 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4309895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33560505319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "CohereForCausalLM", + "params_billions": 34.981 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CohereForAI/aya-23-8B/2ff655cd-9123-4577-832b-3f0b04f7d466.json b/data/hfopenllm_v2/CohereForAI/aya-23-8B/2ff655cd-9123-4577-832b-3f0b04f7d466.json new file mode 100644 index 000000000..ae8daf4a1 --- /dev/null +++ b/data/hfopenllm_v2/CohereForAI/aya-23-8B/2ff655cd-9123-4577-832b-3f0b04f7d466.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CohereForAI_aya-23-8B/1762652579.5050838", + "retrieved_timestamp": "1762652579.505085", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CohereForAI/aya-23-8B", + "developer": "CohereForAI", + "inference_platform": "unknown", + "id": "CohereForAI/aya-23-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4698887839820565 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4296161519220307 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3940625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2278091755319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "CohereForCausalLM", + "params_billions": 8.028 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CohereForAI/aya-expanse-32b/ebbe9a61-6dff-467a-b77c-7c125a043832.json b/data/hfopenllm_v2/CohereForAI/aya-expanse-32b/ebbe9a61-6dff-467a-b77c-7c125a043832.json new file mode 100644 index 000000000..df351f051 --- /dev/null +++ b/data/hfopenllm_v2/CohereForAI/aya-expanse-32b/ebbe9a61-6dff-467a-b77c-7c125a043832.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CohereForAI_aya-expanse-32b/1762652579.505483", + "retrieved_timestamp": "1762652579.505484", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CohereForAI/aya-expanse-32b", + "developer": "CohereForAI", + "inference_platform": "unknown", + "id": "CohereForAI/aya-expanse-32b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7301737168490716 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5648670099212114 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15332326283987915 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32550335570469796 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3872708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41298204787234044 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "CohereForCausalLM", + "params_billions": 32.296 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CohereForAI/aya-expanse-8b/3d54299c-ae39-45f4-b31c-c0667dcbe9f4.json b/data/hfopenllm_v2/CohereForAI/aya-expanse-8b/3d54299c-ae39-45f4-b31c-c0667dcbe9f4.json new file mode 100644 index 000000000..b407718e1 --- /dev/null +++ b/data/hfopenllm_v2/CohereForAI/aya-expanse-8b/3d54299c-ae39-45f4-b31c-c0667dcbe9f4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CohereForAI_aya-expanse-8b/1762652579.505729", + "retrieved_timestamp": "1762652579.5057302", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CohereForAI/aya-expanse-8b", + "developer": "CohereForAI", + "inference_platform": "unknown", + "id": "CohereForAI/aya-expanse-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6358517622131501 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4977203055736406 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08610271903323263 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37288541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3003656914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "CohereForCausalLM", + "params_billions": 8.028 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus-08-2024/f1ef3dda-1b62-4ec9-9c88-a8e60b8a8f6d.json b/data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus-08-2024/f1ef3dda-1b62-4ec9-9c88-a8e60b8a8f6d.json new file mode 100644 index 000000000..542a30489 --- /dev/null +++ b/data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus-08-2024/f1ef3dda-1b62-4ec9-9c88-a8e60b8a8f6d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CohereForAI_c4ai-command-r-plus-08-2024/1762652579.506166", + "retrieved_timestamp": "1762652579.506167", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CohereForAI/c4ai-command-r-plus-08-2024", + "developer": "CohereForAI", + "inference_platform": "unknown", + "id": "CohereForAI/c4ai-command-r-plus-08-2024" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7539539532883859 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5995999913027185 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12386706948640483 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35067114093959734 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48294791666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44207114361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "CohereForCausalLM", + "params_billions": 103.811 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus/c5326cd1-8e73-4f84-8efb-49b3be5c50e7.json b/data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus/c5326cd1-8e73-4f84-8efb-49b3be5c50e7.json new file mode 100644 index 000000000..833b34d48 --- /dev/null +++ b/data/hfopenllm_v2/CohereForAI/c4ai-command-r-plus/c5326cd1-8e73-4f84-8efb-49b3be5c50e7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CohereForAI_c4ai-command-r-plus/1762652579.50595", + "retrieved_timestamp": "1762652579.505951", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CohereForAI/c4ai-command-r-plus", + "developer": "CohereForAI", + "inference_platform": "unknown", + "id": "CohereForAI/c4ai-command-r-plus" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7664186580495308 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.581542357407793 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08006042296072508 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48071875000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3991855053191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "CohereForCausalLM", + "params_billions": 103.811 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CohereForAI/c4ai-command-r-v01/cd24b18c-faff-44e1-87d6-735bcb9ab465.json b/data/hfopenllm_v2/CohereForAI/c4ai-command-r-v01/cd24b18c-faff-44e1-87d6-735bcb9ab465.json new file mode 100644 index 000000000..3d141a9df --- /dev/null +++ b/data/hfopenllm_v2/CohereForAI/c4ai-command-r-v01/cd24b18c-faff-44e1-87d6-735bcb9ab465.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CohereForAI_c4ai-command-r-v01/1762652579.506387", + "retrieved_timestamp": "1762652579.506388", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CohereForAI/c4ai-command-r-v01", + "developer": "CohereForAI", + "inference_platform": "unknown", + "id": "CohereForAI/c4ai-command-r-v01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6748194789824333 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5406415512767856 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45169791666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3369348404255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "CohereForCausalLM", + "params_billions": 34.981 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CohereForAI/c4ai-command-r7b-12-2024/85fa7edb-df5c-4baa-a0f1-c520db55c08c.json b/data/hfopenllm_v2/CohereForAI/c4ai-command-r7b-12-2024/85fa7edb-df5c-4baa-a0f1-c520db55c08c.json new file mode 100644 index 000000000..5029fa439 --- /dev/null +++ b/data/hfopenllm_v2/CohereForAI/c4ai-command-r7b-12-2024/85fa7edb-df5c-4baa-a0f1-c520db55c08c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CohereForAI_c4ai-command-r7b-12-2024/1762652579.5066051", + "retrieved_timestamp": "1762652579.506606", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CohereForAI/c4ai-command-r7b-12-2024", + "developer": "CohereForAI", + "inference_platform": "unknown", + "id": "CohereForAI/c4ai-command-r7b-12-2024" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7713145564878965 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5502642151855635 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2990936555891239 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41251041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3572140957446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Cohere2ForCausalLM", + "params_billions": 8.028 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/76f198aa-0aa5-4c98-8d86-20410582d3a5.json b/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/76f198aa-0aa5-4c98-8d86-20410582d3a5.json new file mode 100644 index 000000000..cb479a782 --- /dev/null +++ b/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/76f198aa-0aa5-4c98-8d86-20410582d3a5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/1762652579.506829", + "retrieved_timestamp": "1762652579.50683", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0", + "developer": "Columbia-NLP", + "inference_platform": "unknown", + "id": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3278312654866864 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39199563613207467 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24916107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41201041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16655585106382978 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/f39ad9a4-b02a-415e-b83a-53d705b6bea2.json b/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/f39ad9a4-b02a-415e-b83a-53d705b6bea2.json new file mode 100644 index 000000000..b3988b3d8 --- /dev/null +++ b/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-dpo-v1.0/f39ad9a4-b02a-415e-b83a-53d705b6bea2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/1762652579.507083", + "retrieved_timestamp": "1762652579.507083", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0", + "developer": "Columbia-NLP", + "inference_platform": "unknown", + "id": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3102457036219453 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38810309159554507 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4080729166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16647273936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-sft-v1.0/0cb84d3d-4f5d-4afc-9c49-de567f2ffbcb.json b/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-sft-v1.0/0cb84d3d-4f5d-4afc-9c49-de567f2ffbcb.json new file mode 100644 index 000000000..ddfa65d1e --- /dev/null +++ b/data/hfopenllm_v2/Columbia-NLP/LION-Gemma-2b-sft-v1.0/0cb84d3d-4f5d-4afc-9c49-de567f2ffbcb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-Gemma-2b-sft-v1.0/1762652579.507553", + "retrieved_timestamp": "1762652579.507553", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Columbia-NLP/LION-Gemma-2b-sft-v1.0", + "developer": "Columbia-NLP", + "inference_platform": "unknown", + "id": "Columbia-NLP/LION-Gemma-2b-sft-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3692469314751526 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.387877927616119 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4027395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17819148936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0/bf83f2be-f684-4ba7-b244-c5cb10f8f0b1.json b/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0/bf83f2be-f684-4ba7-b244-c5cb10f8f0b1.json new file mode 100644 index 000000000..c8e5a7626 --- /dev/null +++ b/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0/bf83f2be-f684-4ba7-b244-c5cb10f8f0b1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-LLaMA-3-8b-dpo-v1.0/1762652579.5077918", + "retrieved_timestamp": "1762652579.507793", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0", + "developer": "Columbia-NLP", + "inference_platform": "unknown", + "id": "Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4957424079220912 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5028481044452986 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11706948640483383 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40971874999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3218916223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0/23c9a71d-3504-497d-a0e2-6a5e299346e5.json b/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0/23c9a71d-3504-497d-a0e2-6a5e299346e5.json new file mode 100644 index 000000000..bd254e8cf --- /dev/null +++ b/data/hfopenllm_v2/Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0/23c9a71d-3504-497d-a0e2-6a5e299346e5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-LLaMA-3-8b-sft-v1.0/1762652579.5082712", + "retrieved_timestamp": "1762652579.5082722", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0", + "developer": "Columbia-NLP", + "inference_platform": "unknown", + "id": "Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38171163623629745 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5087766443418147 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11404833836858005 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45027083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32372007978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CombinHorizon/YiSM-blossom5.1-34B-SLERP/91ec4ba1-6948-48e8-8db0-a335b982c560.json b/data/hfopenllm_v2/CombinHorizon/YiSM-blossom5.1-34B-SLERP/91ec4ba1-6948-48e8-8db0-a335b982c560.json new file mode 100644 index 000000000..5fdc46d78 --- /dev/null +++ b/data/hfopenllm_v2/CombinHorizon/YiSM-blossom5.1-34B-SLERP/91ec4ba1-6948-48e8-8db0-a335b982c560.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CombinHorizon_YiSM-blossom5.1-34B-SLERP/1762652579.508977", + "retrieved_timestamp": "1762652579.508977", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CombinHorizon/YiSM-blossom5.1-34B-SLERP", + "developer": "CombinHorizon", + "inference_platform": "unknown", + "id": "CombinHorizon/YiSM-blossom5.1-34B-SLERP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5033112142448702 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6207548093635428 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21525679758308158 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35570469798657717 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44134375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4740691489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ContactDoctor/Bio-Medical-3B-CoT-012025/4ad50c15-9b6d-40c8-b8ce-74253ecfe258.json b/data/hfopenllm_v2/ContactDoctor/Bio-Medical-3B-CoT-012025/4ad50c15-9b6d-40c8-b8ce-74253ecfe258.json new file mode 100644 index 000000000..056d276ca --- /dev/null +++ b/data/hfopenllm_v2/ContactDoctor/Bio-Medical-3B-CoT-012025/4ad50c15-9b6d-40c8-b8ce-74253ecfe258.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ContactDoctor_Bio-Medical-3B-CoT-012025/1762652579.509939", + "retrieved_timestamp": "1762652579.509939", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ContactDoctor/Bio-Medical-3B-CoT-012025", + "developer": "ContactDoctor", + "inference_platform": "unknown", + "id": "ContactDoctor/Bio-Medical-3B-CoT-012025" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.360379349016166 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.438315337642466 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2212990936555891 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3367604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2933843085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.085 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Corianas/Quokka_2.7b/54015982-408c-469b-86da-6642f5708180.json b/data/hfopenllm_v2/Corianas/Quokka_2.7b/54015982-408c-469b-86da-6642f5708180.json new file mode 100644 index 000000000..49f57e5e2 --- /dev/null +++ b/data/hfopenllm_v2/Corianas/Quokka_2.7b/54015982-408c-469b-86da-6642f5708180.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Corianas_Quokka_2.7b/1762652579.5120142", + "retrieved_timestamp": "1762652579.512015", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Corianas/Quokka_2.7b", + "developer": "Corianas", + "inference_platform": "unknown", + "id": "Corianas/Quokka_2.7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17490702447284318 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3055474937424842 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.008308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3908333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11452792553191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPT2LMHeadModel", + "params_billions": 2.786 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CortexLM/btlm-7b-base-v0.2/aded7428-1283-4ed8-b068-cc1a5ea92dca.json b/data/hfopenllm_v2/CortexLM/btlm-7b-base-v0.2/aded7428-1283-4ed8-b068-cc1a5ea92dca.json new file mode 100644 index 000000000..5be4a7ab9 --- /dev/null +++ b/data/hfopenllm_v2/CortexLM/btlm-7b-base-v0.2/aded7428-1283-4ed8-b068-cc1a5ea92dca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CortexLM_btlm-7b-base-v0.2/1762652579.512528", + "retrieved_timestamp": "1762652579.512528", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CortexLM/btlm-7b-base-v0.2", + "developer": "CortexLM", + "inference_platform": "unknown", + "id": "CortexLM/btlm-7b-base-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14832865685270635 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4006411985841813 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38460416666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2349567819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.885 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Cran-May/SCE-2-24B/f4ff02eb-7763-41bc-8a86-adbb051603af.json b/data/hfopenllm_v2/Cran-May/SCE-2-24B/f4ff02eb-7763-41bc-8a86-adbb051603af.json new file mode 100644 index 000000000..81adea72e --- /dev/null +++ b/data/hfopenllm_v2/Cran-May/SCE-2-24B/f4ff02eb-7763-41bc-8a86-adbb051603af.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Cran-May_SCE-2-24B/1762652579.512776", + "retrieved_timestamp": "1762652579.5127769", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cran-May/SCE-2-24B", + "developer": "Cran-May", + "inference_platform": "unknown", + "id": "Cran-May/SCE-2-24B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5865924635522636 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6264692798019763 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18957703927492447 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.337248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4528125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.461186835106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Cran-May/SCE-3-24B/2d7b9092-a9ad-4f47-b186-db1e1ce7cd6c.json b/data/hfopenllm_v2/Cran-May/SCE-3-24B/2d7b9092-a9ad-4f47-b186-db1e1ce7cd6c.json new file mode 100644 index 000000000..395ba6542 --- /dev/null +++ b/data/hfopenllm_v2/Cran-May/SCE-3-24B/2d7b9092-a9ad-4f47-b186-db1e1ce7cd6c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Cran-May_SCE-3-24B/1762652579.513022", + "retrieved_timestamp": "1762652579.513023", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cran-May/SCE-3-24B", + "developer": "Cran-May", + "inference_platform": "unknown", + "id": "Cran-May/SCE-3-24B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5465254413844156 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.597283045074691 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18806646525679757 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3464765100671141 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44347916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4646775265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Cran-May/T.E-8.1/9c9e0887-5561-4789-9521-a3a78e7cfd99.json b/data/hfopenllm_v2/Cran-May/T.E-8.1/9c9e0887-5561-4789-9521-a3a78e7cfd99.json new file mode 100644 index 000000000..fba7e4e44 --- /dev/null +++ b/data/hfopenllm_v2/Cran-May/T.E-8.1/9c9e0887-5561-4789-9521-a3a78e7cfd99.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Cran-May_T.E-8.1/1762652579.513231", + "retrieved_timestamp": "1762652579.513231", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cran-May/T.E-8.1", + "developer": "Cran-May", + "inference_platform": "unknown", + "id": "Cran-May/T.E-8.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7076922565459647 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5581754708123893 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44561933534743203 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4505208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4432347074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Cran-May/merge_model_20250308_2/c457473c-6c40-4930-94b8-993d3b1e8937.json b/data/hfopenllm_v2/Cran-May/merge_model_20250308_2/c457473c-6c40-4930-94b8-993d3b1e8937.json new file mode 100644 index 000000000..cad42a0b7 --- /dev/null +++ b/data/hfopenllm_v2/Cran-May/merge_model_20250308_2/c457473c-6c40-4930-94b8-993d3b1e8937.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Cran-May_merge_model_20250308_2/1762652579.51357", + "retrieved_timestamp": "1762652579.5135732", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cran-May/merge_model_20250308_2", + "developer": "Cran-May", + "inference_platform": "unknown", + "id": "Cran-May/merge_model_20250308_2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5932370554572978 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6585311075974459 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39093959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4793541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5419714095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Cran-May/merge_model_20250308_3/5448dbb6-9874-4734-8252-369c7b0189d7.json b/data/hfopenllm_v2/Cran-May/merge_model_20250308_3/5448dbb6-9874-4734-8252-369c7b0189d7.json new file mode 100644 index 000000000..83ef5d528 --- /dev/null +++ b/data/hfopenllm_v2/Cran-May/merge_model_20250308_3/5448dbb6-9874-4734-8252-369c7b0189d7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Cran-May_merge_model_20250308_3/1762652579.513911", + "retrieved_timestamp": "1762652579.513912", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cran-May/merge_model_20250308_3", + "developer": "Cran-May", + "inference_platform": "unknown", + "id": "Cran-May/merge_model_20250308_3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6017799438822324 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6271459892225041 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2545317220543807 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221476510067114 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43204166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49617686170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Cran-May/merge_model_20250308_4/45531924-35ad-4baf-9994-5d5fa3bafd02.json b/data/hfopenllm_v2/Cran-May/merge_model_20250308_4/45531924-35ad-4baf-9994-5d5fa3bafd02.json new file mode 100644 index 000000000..1b6f65874 --- /dev/null +++ b/data/hfopenllm_v2/Cran-May/merge_model_20250308_4/45531924-35ad-4baf-9994-5d5fa3bafd02.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Cran-May_merge_model_20250308_4/1762652579.514166", + "retrieved_timestamp": "1762652579.514167", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cran-May/merge_model_20250308_4", + "developer": "Cran-May", + "inference_platform": "unknown", + "id": "Cran-May/merge_model_20250308_4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4539521802151624 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.666435217186487 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4199395770392749 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3976510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4688125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5366522606382979 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Cran-May/tempmotacilla-cinerea-0308/5e5e70f4-c597-415c-ab74-17aaf55b7b28.json b/data/hfopenllm_v2/Cran-May/tempmotacilla-cinerea-0308/5e5e70f4-c597-415c-ab74-17aaf55b7b28.json new file mode 100644 index 000000000..a0a8a2f33 --- /dev/null +++ b/data/hfopenllm_v2/Cran-May/tempmotacilla-cinerea-0308/5e5e70f4-c597-415c-ab74-17aaf55b7b28.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Cran-May_tempmotacilla-cinerea-0308/1762652579.514418", + "retrieved_timestamp": "1762652579.5144188", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Cran-May/tempmotacilla-cinerea-0308", + "developer": "Cran-May", + "inference_platform": "unknown", + "id": "Cran-May/tempmotacilla-cinerea-0308" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8084837121061007 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6550960569488126 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5551359516616314 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3624161073825503 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42082291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5250166223404256 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernicke-SFT/84bc884e-29be-40b5-bfe2-6147bec90a78.json b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernicke-SFT/84bc884e-29be-40b5-bfe2-6147bec90a78.json new file mode 100644 index 000000000..8e4e919a1 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwen2.5-14B-Wernicke-SFT/84bc884e-29be-40b5-bfe2-6147bec90a78.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Wernicke-SFT/1762652579.520046", + "retrieved_timestamp": "1762652579.5200472", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-Wernicke-SFT", + "developer": "CultriX", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-Wernicke-SFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4937443760333692 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6460586236565512 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3540268456375839 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38999999999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5069813829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/CultriX/Qwestion-14B/c6ad96f2-fcb9-47c5-8106-936436b6ad1b.json b/data/hfopenllm_v2/CultriX/Qwestion-14B/c6ad96f2-fcb9-47c5-8106-936436b6ad1b.json new file mode 100644 index 000000000..316cecff4 --- /dev/null +++ b/data/hfopenllm_v2/CultriX/Qwestion-14B/c6ad96f2-fcb9-47c5-8106-936436b6ad1b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwestion-14B/1762652579.521322", + "retrieved_timestamp": "1762652579.521322", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwestion-14B", + "developer": "CultriX", + "inference_platform": "unknown", + "id": "CultriX/Qwestion-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6317803428237078 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6450104739140539 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3723564954682779 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36828859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46360416666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.542220744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DRXD1000/Atlas-7B/1f223500-a1d6-471f-b3cf-2575ab5a52c8.json b/data/hfopenllm_v2/DRXD1000/Atlas-7B/1f223500-a1d6-471f-b3cf-2575ab5a52c8.json new file mode 100644 index 000000000..0f88fcd87 --- /dev/null +++ b/data/hfopenllm_v2/DRXD1000/Atlas-7B/1f223500-a1d6-471f-b3cf-2575ab5a52c8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DRXD1000_Atlas-7B/1762652579.5232708", + "retrieved_timestamp": "1762652579.523272", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DRXD1000/Atlas-7B", + "developer": "DRXD1000", + "inference_platform": "unknown", + "id": "DRXD1000/Atlas-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3704459722425387 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3302176697760134 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0188821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33425 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14012632978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.768 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DRXD1000/Phoenix-7B/bff80553-e91f-470e-923c-7f8103d37fca.json b/data/hfopenllm_v2/DRXD1000/Phoenix-7B/bff80553-e91f-470e-923c-7f8103d37fca.json new file mode 100644 index 000000000..d99e78a45 --- /dev/null +++ b/data/hfopenllm_v2/DRXD1000/Phoenix-7B/bff80553-e91f-470e-923c-7f8103d37fca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DRXD1000_Phoenix-7B/1762652579.5236301", + "retrieved_timestamp": "1762652579.523632", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DRXD1000/Phoenix-7B", + "developer": "DRXD1000", + "inference_platform": "unknown", + "id": "DRXD1000/Phoenix-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3209617149164218 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3931566034728218 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38494791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23429188829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DUAL-GPO/zephyr-7b-ipo-0k-15k-i1/a4cd4144-75d5-4c48-a936-96d70f052a66.json b/data/hfopenllm_v2/DUAL-GPO/zephyr-7b-ipo-0k-15k-i1/a4cd4144-75d5-4c48-a936-96d70f052a66.json new file mode 100644 index 000000000..57113a637 --- /dev/null +++ b/data/hfopenllm_v2/DUAL-GPO/zephyr-7b-ipo-0k-15k-i1/a4cd4144-75d5-4c48-a936-96d70f052a66.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DUAL-GPO_zephyr-7b-ipo-0k-15k-i1/1762652579.523929", + "retrieved_timestamp": "1762652579.52393", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DUAL-GPO/zephyr-7b-ipo-0k-15k-i1", + "developer": "DUAL-GPO", + "inference_platform": "unknown", + "id": "DUAL-GPO/zephyr-7b-ipo-0k-15k-i1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27562423259174545 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4472712447565954 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030211480362537766 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41734374999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31299867021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 14.483 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DZgas/GIGABATEMAN-7B/180be3a9-1d8e-4705-bda4-032bc66768c6.json b/data/hfopenllm_v2/DZgas/GIGABATEMAN-7B/180be3a9-1d8e-4705-bda4-032bc66768c6.json new file mode 100644 index 000000000..100874c4a --- /dev/null +++ b/data/hfopenllm_v2/DZgas/GIGABATEMAN-7B/180be3a9-1d8e-4705-bda4-032bc66768c6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DZgas_GIGABATEMAN-7B/1762652579.524226", + "retrieved_timestamp": "1762652579.5242271", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DZgas/GIGABATEMAN-7B", + "developer": "DZgas", + "inference_platform": "unknown", + "id": "DZgas/GIGABATEMAN-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46074637517342876 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5032184342862756 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43284374999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3176529255319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/AetherDrake-SFT/843cbaa0-5d9d-47a8-ae69-fe38a5812136.json b/data/hfopenllm_v2/Daemontatox/AetherDrake-SFT/843cbaa0-5d9d-47a8-ae69-fe38a5812136.json new file mode 100644 index 000000000..89499bf6d --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/AetherDrake-SFT/843cbaa0-5d9d-47a8-ae69-fe38a5812136.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_AetherDrake-SFT/1762652579.524555", + "retrieved_timestamp": "1762652579.524556", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/AetherDrake-SFT", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/AetherDrake-SFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4812796712722244 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48720075507220245 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1510574018126888 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40884375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34990026595744683 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/AetherSett/791a8f9f-5c85-42e5-a06d-270118b0c7c2.json b/data/hfopenllm_v2/Daemontatox/AetherSett/791a8f9f-5c85-42e5-a06d-270118b0c7c2.json new file mode 100644 index 000000000..4bb4a8556 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/AetherSett/791a8f9f-5c85-42e5-a06d-270118b0c7c2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_AetherSett/1762652579.524883", + "retrieved_timestamp": "1762652579.524884", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/AetherSett", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/AetherSett" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5369586031729146 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5451624435465484 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3972809667673716 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46031249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4278590425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/AetherTOT/8ac4547d-2b57-4227-a63d-05da4f3ccbc7.json b/data/hfopenllm_v2/Daemontatox/AetherTOT/8ac4547d-2b57-4227-a63d-05da4f3ccbc7.json new file mode 100644 index 000000000..f5bc708c8 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/AetherTOT/8ac4547d-2b57-4227-a63d-05da4f3ccbc7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_AetherTOT/1762652579.5251331", + "retrieved_timestamp": "1762652579.5251389", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/AetherTOT", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/AetherTOT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4397642699149368 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5066056342472064 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1487915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4078541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38040226063829785 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MllamaForConditionalGeneration", + "params_billions": 10.67 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/AetherTOT/fa9282c6-7820-49dd-9893-9559c5a984a9.json b/data/hfopenllm_v2/Daemontatox/AetherTOT/fa9282c6-7820-49dd-9893-9559c5a984a9.json new file mode 100644 index 000000000..1d63e8c1c --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/AetherTOT/fa9282c6-7820-49dd-9893-9559c5a984a9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_AetherTOT/1762652579.5253801", + "retrieved_timestamp": "1762652579.525381", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/AetherTOT", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/AetherTOT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43829040279790954 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5034307630533988 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14425981873111782 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40518750000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37782579787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MllamaForConditionalGeneration", + "params_billions": 10.67 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/AetherUncensored/574d79eb-94ae-4b79-8763-77267d300670.json b/data/hfopenllm_v2/Daemontatox/AetherUncensored/574d79eb-94ae-4b79-8763-77267d300670.json new file mode 100644 index 000000000..39f20c89e --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/AetherUncensored/574d79eb-94ae-4b79-8763-77267d300670.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_AetherUncensored/1762652579.525634", + "retrieved_timestamp": "1762652579.5256362", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/AetherUncensored", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/AetherUncensored" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40419309653940433 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44631282805144945 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14501510574018128 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3746770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27102726063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/Cogito-MIS/822268e0-8f66-4bb3-9d01-52c684ca281f.json b/data/hfopenllm_v2/Daemontatox/Cogito-MIS/822268e0-8f66-4bb3-9d01-52c684ca281f.json new file mode 100644 index 000000000..a56880a09 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/Cogito-MIS/822268e0-8f66-4bb3-9d01-52c684ca281f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_Cogito-MIS/1762652579.525943", + "retrieved_timestamp": "1762652579.5259452", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/Cogito-MIS", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/Cogito-MIS" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18145188100905596 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5059981143086196 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08610271903323263 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37676041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14353390957446807 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/CogitoDistil/f39e1ca4-2a0f-4650-886b-4160760daee5.json b/data/hfopenllm_v2/Daemontatox/CogitoDistil/f39e1ca4-2a0f-4650-886b-4160760daee5.json new file mode 100644 index 000000000..9cf24eb56 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/CogitoDistil/f39e1ca4-2a0f-4650-886b-4160760daee5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_CogitoDistil/1762652579.526295", + "retrieved_timestamp": "1762652579.5262961", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/CogitoDistil", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/CogitoDistil" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27764775240805506 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36767660461416857 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39274924471299094 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3754895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625498670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/CogitoZ/5e08074c-32bd-4ce6-a09f-7b5832cba288.json b/data/hfopenllm_v2/Daemontatox/CogitoZ/5e08074c-32bd-4ce6-a09f-7b5832cba288.json new file mode 100644 index 000000000..08c0282b1 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/CogitoZ/5e08074c-32bd-4ce6-a09f-7b5832cba288.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_CogitoZ/1762652579.5265448", + "retrieved_timestamp": "1762652579.526546", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/CogitoZ", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/CogitoZ" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3967240255854466 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6734487392645502 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5241691842900302 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3951342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4792604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5592586436170213 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/CogitoZ14/024f23d8-66b0-4a7b-be01-fd68f0ab295e.json b/data/hfopenllm_v2/Daemontatox/CogitoZ14/024f23d8-66b0-4a7b-be01-fd68f0ab295e.json new file mode 100644 index 000000000..c7e763d47 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/CogitoZ14/024f23d8-66b0-4a7b-be01-fd68f0ab295e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_CogitoZ14/1762652579.526777", + "retrieved_timestamp": "1762652579.5267782", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/CogitoZ14", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/CogitoZ14" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6637034180419066 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6297514788808327 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42220543806646527 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.405875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39993351063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/DocumentCogito/6d2a742b-adde-4b6d-90d4-ebefbb2b61be.json b/data/hfopenllm_v2/Daemontatox/DocumentCogito/6d2a742b-adde-4b6d-90d4-ebefbb2b61be.json new file mode 100644 index 000000000..44b579dd5 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/DocumentCogito/6d2a742b-adde-4b6d-90d4-ebefbb2b61be.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_DocumentCogito/1762652579.5270069", + "retrieved_timestamp": "1762652579.527008", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/DocumentCogito", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/DocumentCogito" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5064340394597445 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5111563719111275 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16314199395770393 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3973125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38023603723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MllamaForConditionalGeneration", + "params_billions": 10.67 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/DocumentCogito/9a638bb6-f16f-496b-a974-d97dbb6cd626.json b/data/hfopenllm_v2/Daemontatox/DocumentCogito/9a638bb6-f16f-496b-a974-d97dbb6cd626.json new file mode 100644 index 000000000..8aeeac07f --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/DocumentCogito/9a638bb6-f16f-496b-a974-d97dbb6cd626.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_DocumentCogito/1762652579.527227", + "retrieved_timestamp": "1762652579.5272279", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/DocumentCogito", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/DocumentCogito" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7770349339751859 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5186726621665779 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21978851963746224 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39105208333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3737533244680851 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MllamaForConditionalGeneration", + "params_billions": 10.67 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/MawaredT1/1e87d1ea-59df-4c1a-96da-31e12e27dab2.json b/data/hfopenllm_v2/Daemontatox/MawaredT1/1e87d1ea-59df-4c1a-96da-31e12e27dab2.json new file mode 100644 index 000000000..da1bc48b8 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/MawaredT1/1e87d1ea-59df-4c1a-96da-31e12e27dab2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_MawaredT1/1762652579.527918", + "retrieved_timestamp": "1762652579.527919", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/MawaredT1", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/MawaredT1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41988036188424493 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5214815439293661 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3021148036253776 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3347315436241611 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47020833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4718251329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/Mini_QwQ/7d5c59eb-c6fb-414a-9e4e-44d1d56f7401.json b/data/hfopenllm_v2/Daemontatox/Mini_QwQ/7d5c59eb-c6fb-414a-9e4e-44d1d56f7401.json new file mode 100644 index 000000000..8aebd6435 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/Mini_QwQ/7d5c59eb-c6fb-414a-9e4e-44d1d56f7401.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_Mini_QwQ/1762652579.528199", + "retrieved_timestamp": "1762652579.5282", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/Mini_QwQ", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/Mini_QwQ" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44970566984490046 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.554898906584336 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41918429003021146 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46825 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.437250664893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/NemoR/a2da90e0-5f59-4c89-b819-316d2cc318be.json b/data/hfopenllm_v2/Daemontatox/NemoR/a2da90e0-5f59-4c89-b819-316d2cc318be.json new file mode 100644 index 000000000..c6d74e255 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/NemoR/a2da90e0-5f59-4c89-b819-316d2cc318be.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_NemoR/1762652579.528459", + "retrieved_timestamp": "1762652579.528459", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/NemoR", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/NemoR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2287375275380435 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5194067688446361 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271812080536913 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39080208333333327 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32903922872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/PathFinderAI2.0/274ab6b9-5fd7-41df-9076-b16c52947640.json b/data/hfopenllm_v2/Daemontatox/PathFinderAI2.0/274ab6b9-5fd7-41df-9076-b16c52947640.json new file mode 100644 index 000000000..7669e28dc --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/PathFinderAI2.0/274ab6b9-5fd7-41df-9076-b16c52947640.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_PathFinderAI2.0/1762652579.528686", + "retrieved_timestamp": "1762652579.528686", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/PathFinderAI2.0", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/PathFinderAI2.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45410178326839457 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.665823006477417 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5075528700906344 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4215625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5546875 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/PathFinderAi3.0/ba3924c6-f913-4094-a56a-1699f07f103c.json b/data/hfopenllm_v2/Daemontatox/PathFinderAi3.0/ba3924c6-f913-4094-a56a-1699f07f103c.json new file mode 100644 index 000000000..a6c8bdb24 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/PathFinderAi3.0/ba3924c6-f913-4094-a56a-1699f07f103c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_PathFinderAi3.0/1762652579.5289202", + "retrieved_timestamp": "1762652579.5289202", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/PathFinderAi3.0", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/PathFinderAi3.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42709898624538445 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6884221416328996 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5045317220543807 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4085570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4806875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5757147606382979 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/PathfinderAI/445f2c79-2c47-465c-ace7-73b3fa491454.json b/data/hfopenllm_v2/Daemontatox/PathfinderAI/445f2c79-2c47-465c-ace7-73b3fa491454.json new file mode 100644 index 000000000..9badb0279 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/PathfinderAI/445f2c79-2c47-465c-ace7-73b3fa491454.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_PathfinderAI/1762652579.529176", + "retrieved_timestamp": "1762652579.5291772", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/PathfinderAI", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/PathfinderAI" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37451739163198094 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6667854331232542 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47583081570996977 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39429530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48583333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.559341755319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/PathfinderAI/c07f2943-f3f4-46be-993e-be56dadcb561.json b/data/hfopenllm_v2/Daemontatox/PathfinderAI/c07f2943-f3f4-46be-993e-be56dadcb561.json new file mode 100644 index 000000000..ac333a09b --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/PathfinderAI/c07f2943-f3f4-46be-993e-be56dadcb561.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_PathfinderAI/1762652579.5294342", + "retrieved_timestamp": "1762652579.529435", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/PathfinderAI", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/PathfinderAI" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4855006937148987 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6627335380624046 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48413897280966767 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42559375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.554188829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/PixelParse_AI/29459932-a7a5-458f-9778-e236cc4ea985.json b/data/hfopenllm_v2/Daemontatox/PixelParse_AI/29459932-a7a5-458f-9778-e236cc4ea985.json new file mode 100644 index 000000000..aa668632a --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/PixelParse_AI/29459932-a7a5-458f-9778-e236cc4ea985.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_PixelParse_AI/1762652579.529871", + "retrieved_timestamp": "1762652579.529872", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/PixelParse_AI", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/PixelParse_AI" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43829040279790954 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5034307630533988 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1472809667673716 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40518750000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37782579787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MllamaForConditionalGeneration", + "params_billions": 10.67 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/RA2.0/3baf9882-5625-47eb-a88b-b172dfc9a330.json b/data/hfopenllm_v2/Daemontatox/RA2.0/3baf9882-5625-47eb-a88b-b172dfc9a330.json new file mode 100644 index 000000000..51186af72 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/RA2.0/3baf9882-5625-47eb-a88b-b172dfc9a330.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_RA2.0/1762652579.53008", + "retrieved_timestamp": "1762652579.530081", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/RA2.0", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/RA2.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37838934028378035 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4888687006782508 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38368580060422963 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40912499999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26163563829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/RA_Reasoner/ab74d5ca-6c80-44de-96e9-af61861090b6.json b/data/hfopenllm_v2/Daemontatox/RA_Reasoner/ab74d5ca-6c80-44de-96e9-af61861090b6.json new file mode 100644 index 000000000..b1f4c3ee0 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/RA_Reasoner/ab74d5ca-6c80-44de-96e9-af61861090b6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_RA_Reasoner/1762652579.530283", + "retrieved_timestamp": "1762652579.530284", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/RA_Reasoner", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/RA_Reasoner" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.559215104810791 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6053692417205033 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2122356495468278 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3963541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43001994680851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/RA_Reasoner2.0/5cf9872a-6d67-4b42-bfe4-abad05bdd9cf.json b/data/hfopenllm_v2/Daemontatox/RA_Reasoner2.0/5cf9872a-6d67-4b42-bfe4-abad05bdd9cf.json new file mode 100644 index 000000000..e5a576999 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/RA_Reasoner2.0/5cf9872a-6d67-4b42-bfe4-abad05bdd9cf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_RA_Reasoner2.0/1762652579.530484", + "retrieved_timestamp": "1762652579.530485", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/RA_Reasoner2.0", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/RA_Reasoner2.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5366339091388627 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6062469551969276 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2311178247734139 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3883541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4353390957446808 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/ReasonTest/39d481bf-ea86-42a7-a6f1-ce38ce9dce30.json b/data/hfopenllm_v2/Daemontatox/ReasonTest/39d481bf-ea86-42a7-a6f1-ce38ce9dce30.json new file mode 100644 index 000000000..2eae2e44d --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/ReasonTest/39d481bf-ea86-42a7-a6f1-ce38ce9dce30.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_ReasonTest/1762652579.530685", + "retrieved_timestamp": "1762652579.530686", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/ReasonTest", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/ReasonTest" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4079653098223824 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.543526397621609 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21374622356495468 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43154166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4271941489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.808 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/Research_PathfinderAI/900e5686-083d-460c-918f-06a39936810c.json b/data/hfopenllm_v2/Daemontatox/Research_PathfinderAI/900e5686-083d-460c-918f-06a39936810c.json new file mode 100644 index 000000000..57dbc59cb --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/Research_PathfinderAI/900e5686-083d-460c-918f-06a39936810c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_Research_PathfinderAI/1762652579.530894", + "retrieved_timestamp": "1762652579.530895", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/Research_PathfinderAI", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/Research_PathfinderAI" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3456916537010687 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287225755504323 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16993957703927492 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2407718120805369 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33939583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11303191489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/Zirel-7B-Math/460de6c8-d706-420b-9c0a-a108ddb11e5f.json b/data/hfopenllm_v2/Daemontatox/Zirel-7B-Math/460de6c8-d706-420b-9c0a-a108ddb11e5f.json new file mode 100644 index 000000000..6abcd7894 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/Zirel-7B-Math/460de6c8-d706-420b-9c0a-a108ddb11e5f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_Zirel-7B-Math/1762652579.531958", + "retrieved_timestamp": "1762652579.531959", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/Zirel-7B-Math", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/Zirel-7B-Math" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6638785090227264 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5447698777469486 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19788519637462235 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3263422818791946 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47891666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4237034574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/Zirel_1.5/661e2393-2560-4d25-a6f3-f0d680052e8e.json b/data/hfopenllm_v2/Daemontatox/Zirel_1.5/661e2393-2560-4d25-a6f3-f0d680052e8e.json new file mode 100644 index 000000000..44e7be9b8 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/Zirel_1.5/661e2393-2560-4d25-a6f3-f0d680052e8e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_Zirel_1.5/1762652579.532257", + "retrieved_timestamp": "1762652579.532258", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/Zirel_1.5", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/Zirel_1.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4167575366693706 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3984669254999634 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11329305135951662 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36581250000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21434507978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/mini-Cogito-R1/faac8ed1-1042-42dc-9762-3f90161fb34f.json b/data/hfopenllm_v2/Daemontatox/mini-Cogito-R1/faac8ed1-1042-42dc-9762-3f90161fb34f.json new file mode 100644 index 000000000..c050631b8 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/mini-Cogito-R1/faac8ed1-1042-42dc-9762-3f90161fb34f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_mini-Cogito-R1/1762652579.532486", + "retrieved_timestamp": "1762652579.532487", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/mini-Cogito-R1", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/mini-Cogito-R1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2298368329366082 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3280491875175077 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27492447129909364 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34469791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14818816489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Daemontatox/mini_Pathfinder/a9afd0b3-8189-47e0-9e33-d60540679e20.json b/data/hfopenllm_v2/Daemontatox/mini_Pathfinder/a9afd0b3-8189-47e0-9e33-d60540679e20.json new file mode 100644 index 000000000..6dbd700d9 --- /dev/null +++ b/data/hfopenllm_v2/Daemontatox/mini_Pathfinder/a9afd0b3-8189-47e0-9e33-d60540679e20.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_mini_Pathfinder/1762652579.53272", + "retrieved_timestamp": "1762652579.5327208", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/mini_Pathfinder", + "developer": "Daemontatox", + "inference_platform": "unknown", + "id": "Daemontatox/mini_Pathfinder" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29615752869054107 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39556911910803755 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47507552870090636 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37809374999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28091755319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Dampfinchen/Llama-3.1-8B-Ultra-Instruct/2a0d23aa-47ae-4974-ac64-5371097a1b0f.json b/data/hfopenllm_v2/Dampfinchen/Llama-3.1-8B-Ultra-Instruct/2a0d23aa-47ae-4974-ac64-5371097a1b0f.json new file mode 100644 index 000000000..4fdc019de --- /dev/null +++ b/data/hfopenllm_v2/Dampfinchen/Llama-3.1-8B-Ultra-Instruct/2a0d23aa-47ae-4974-ac64-5371097a1b0f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Dampfinchen_Llama-3.1-8B-Ultra-Instruct/1762652579.532935", + "retrieved_timestamp": "1762652579.532935", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dampfinchen/Llama-3.1-8B-Ultra-Instruct", + "developer": "Dampfinchen", + "inference_platform": "unknown", + "id": "Dampfinchen/Llama-3.1-8B-Ultra-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8081091503876381 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5257532452246574 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22054380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40032291666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.382563164893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Danielbrdz/Barcenas-10b/acdaefdc-b28c-4081-bf72-517d6c70595e.json b/data/hfopenllm_v2/Danielbrdz/Barcenas-10b/acdaefdc-b28c-4081-bf72-517d6c70595e.json new file mode 100644 index 000000000..07da5b577 --- /dev/null +++ b/data/hfopenllm_v2/Danielbrdz/Barcenas-10b/acdaefdc-b28c-4081-bf72-517d6c70595e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-10b/1762652579.533203", + "retrieved_timestamp": "1762652579.533203", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Danielbrdz/Barcenas-10b", + "developer": "Danielbrdz", + "inference_platform": "unknown", + "id": "Danielbrdz/Barcenas-10b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6607811717354397 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6120828494270083 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21525679758308158 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3414429530201342 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41346875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4360871010638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Danielbrdz/Barcenas-3b-GRPO/88a3b40a-3ba2-4f13-bd8c-110872d807c7.json b/data/hfopenllm_v2/Danielbrdz/Barcenas-3b-GRPO/88a3b40a-3ba2-4f13-bd8c-110872d807c7.json new file mode 100644 index 000000000..da2f614a9 --- /dev/null +++ b/data/hfopenllm_v2/Danielbrdz/Barcenas-3b-GRPO/88a3b40a-3ba2-4f13-bd8c-110872d807c7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-3b-GRPO/1762652579.534181", + "retrieved_timestamp": "1762652579.5341818", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Danielbrdz/Barcenas-3b-GRPO", + "developer": "Danielbrdz", + "inference_platform": "unknown", + "id": "Danielbrdz/Barcenas-3b-GRPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5444276741268723 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44143515175110304 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13746223564954682 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35759375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036901595744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-2/bd21f54f-6b0c-4db9-bb46-7a4c60f960ae.json b/data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-2/bd21f54f-6b0c-4db9-bb46-7a4c60f960ae.json new file mode 100644 index 000000000..6074f9aa8 --- /dev/null +++ b/data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-2/bd21f54f-6b0c-4db9-bb46-7a4c60f960ae.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_12b-mn-dans-reasoning-test-2/1762652579.534956", + "retrieved_timestamp": "1762652579.5349572", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dans-DiscountModels/12b-mn-dans-reasoning-test-2", + "developer": "Dans-DiscountModels", + "inference_platform": "unknown", + "id": "Dans-DiscountModels/12b-mn-dans-reasoning-test-2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3710953603106424 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48070333147041405 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37021875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2507480053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-3/c9dedad4-65d4-479e-b465-912cd8885e32.json b/data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-3/c9dedad4-65d4-479e-b465-912cd8885e32.json new file mode 100644 index 000000000..4348b9452 --- /dev/null +++ b/data/hfopenllm_v2/Dans-DiscountModels/12b-mn-dans-reasoning-test-3/c9dedad4-65d4-479e-b465-912cd8885e32.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_12b-mn-dans-reasoning-test-3/1762652579.535208", + "retrieved_timestamp": "1762652579.535209", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dans-DiscountModels/12b-mn-dans-reasoning-test-3", + "developer": "Dans-DiscountModels", + "inference_platform": "unknown", + "id": "Dans-DiscountModels/12b-mn-dans-reasoning-test-3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5052593784491815 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48388753289945696 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07779456193353475 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4167604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2515791223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML/6b61018c-249d-482b-a787-06f1e6514f29.json b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML/6b61018c-249d-482b-a787-06f1e6514f29.json new file mode 100644 index 000000000..1582ce7e8 --- /dev/null +++ b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML/6b61018c-249d-482b-a787-06f1e6514f29.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-CoreCurriculum-12b-ChatML/1762652579.535429", + "retrieved_timestamp": "1762652579.53543", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML", + "developer": "Dans-DiscountModels", + "inference_platform": "unknown", + "id": "Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21110209798889168 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4791864789096407 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3606354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2805019946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0/9873b58d-1ffd-44a7-bb93-15038986419a.json b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0/9873b58d-1ffd-44a7-bb93-15038986419a.json new file mode 100644 index 000000000..f43831e00 --- /dev/null +++ b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0/9873b58d-1ffd-44a7-bb93-15038986419a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.0/1762652579.5358772", + "retrieved_timestamp": "1762652579.535878", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0", + "developer": "Dans-DiscountModels", + "inference_platform": "unknown", + "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06682048076880455 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47747656219777285 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06722054380664652 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3785833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.328374335106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1/71656625-cd85-49a6-a8df-abc0b9c0ae5d.json b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1/71656625-cd85-49a6-a8df-abc0b9c0ae5d.json new file mode 100644 index 000000000..89fc7ebcd --- /dev/null +++ b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1/71656625-cd85-49a6-a8df-abc0b9c0ae5d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.1/1762652579.5360918", + "retrieved_timestamp": "1762652579.5360918", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1", + "developer": "Dans-DiscountModels", + "inference_platform": "unknown", + "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09105063453857985 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4748653313732898 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05966767371601209 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3824895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.327875664893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0/d47dc284-0ed6-4853-8a54-b87b4b529150.json b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0/d47dc284-0ed6-4853-8a54-b87b4b529150.json new file mode 100644 index 000000000..046914817 --- /dev/null +++ b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0/d47dc284-0ed6-4853-8a54-b87b4b529150.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.2.0/1762652579.536302", + "retrieved_timestamp": "1762652579.536303", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0", + "developer": "Dans-DiscountModels", + "inference_platform": "unknown", + "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5064085515321569 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4624263551503409 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07326283987915408 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3644479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2999501329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML/60db255b-d34c-4f33-91a4-279a9ccc6791.json b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML/60db255b-d34c-4f33-91a4-279a9ccc6791.json new file mode 100644 index 000000000..e36943275 --- /dev/null +++ b/data/hfopenllm_v2/Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML/60db255b-d34c-4f33-91a4-279a9ccc6791.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML/1762652579.5356538", + "retrieved_timestamp": "1762652579.535655", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML", + "developer": "Dans-DiscountModels", + "inference_platform": "unknown", + "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08250774611364513 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4738171816307924 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3918229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32878989361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Darkknight535/OpenCrystal-12B-L3/8edb0a0d-994b-4b97-b9a7-7f46ba0e7365.json b/data/hfopenllm_v2/Darkknight535/OpenCrystal-12B-L3/8edb0a0d-994b-4b97-b9a7-7f46ba0e7365.json new file mode 100644 index 000000000..973da37f6 --- /dev/null +++ b/data/hfopenllm_v2/Darkknight535/OpenCrystal-12B-L3/8edb0a0d-994b-4b97-b9a7-7f46ba0e7365.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Darkknight535_OpenCrystal-12B-L3/1762652579.5369642", + "retrieved_timestamp": "1762652579.5369651", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Darkknight535/OpenCrystal-12B-L3", + "developer": "Darkknight535", + "inference_platform": "unknown", + "id": "Darkknight535/OpenCrystal-12B-L3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4070909630890482 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5222598504945516 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08987915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36565625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3640292553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 11.52 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/L3-DARKEST-PLANET-16.5B/2c317db5-86fa-41fd-8f1e-3cf08ba91cde.json b/data/hfopenllm_v2/DavidAU/L3-DARKEST-PLANET-16.5B/2c317db5-86fa-41fd-8f1e-3cf08ba91cde.json new file mode 100644 index 000000000..b3f9b0168 --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/L3-DARKEST-PLANET-16.5B/2c317db5-86fa-41fd-8f1e-3cf08ba91cde.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_L3-DARKEST-PLANET-16.5B/1762652579.540939", + "retrieved_timestamp": "1762652579.54094", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/L3-DARKEST-PLANET-16.5B", + "developer": "DavidAU", + "inference_platform": "unknown", + "id": "DavidAU/L3-DARKEST-PLANET-16.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6230623634179533 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5230436906708896 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08987915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3753645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.363031914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 16.537 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/L3-Dark-Planet-8B/f5c2a2cc-392e-4337-aad9-72d65ba87aab.json b/data/hfopenllm_v2/DavidAU/L3-Dark-Planet-8B/f5c2a2cc-392e-4337-aad9-72d65ba87aab.json new file mode 100644 index 000000000..774dc4010 --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/L3-Dark-Planet-8B/f5c2a2cc-392e-4337-aad9-72d65ba87aab.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_L3-Dark-Planet-8B/1762652579.5412621", + "retrieved_timestamp": "1762652579.541263", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/L3-Dark-Planet-8B", + "developer": "DavidAU", + "inference_platform": "unknown", + "id": "DavidAU/L3-Dark-Planet-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4134108609600305 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5084081453197787 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0823262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36159375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37367021276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct/85a1ef3f-7d68-4324-876d-b52cfa71317d.json b/data/hfopenllm_v2/DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct/85a1ef3f-7d68-4324-876d-b52cfa71317d.json new file mode 100644 index 000000000..73c09edee --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct/85a1ef3f-7d68-4324-876d-b52cfa71317d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_L3-Jamet-12.2B-MK.V-Blackroot-Instruct/1762652579.541475", + "retrieved_timestamp": "1762652579.541475", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct", + "developer": "DavidAU", + "inference_platform": "unknown", + "id": "DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3961998608137519 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4765717717789398 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40196875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3291223404255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 12.174 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct/a8fe768d-f988-4fba-be80-2f5cc22dfd9d.json b/data/hfopenllm_v2/DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct/a8fe768d-f988-4fba-be80-2f5cc22dfd9d.json new file mode 100644 index 000000000..d1deb236f --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct/a8fe768d-f988-4fba-be80-2f5cc22dfd9d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_L3-Lumimaid-12.2B-v0.1-OAS-Instruct/1762652579.541698", + "retrieved_timestamp": "1762652579.5416992", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct", + "developer": "DavidAU", + "inference_platform": "unknown", + "id": "DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3924032677739509 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46930207579694677 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04607250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41942708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31416223404255317 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 12.174 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/L3-SMB-Instruct-12.2B-F32/970cfd49-b72c-4cf5-af05-1ecfc57c94d8.json b/data/hfopenllm_v2/DavidAU/L3-SMB-Instruct-12.2B-F32/970cfd49-b72c-4cf5-af05-1ecfc57c94d8.json new file mode 100644 index 000000000..3f9b310d1 --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/L3-SMB-Instruct-12.2B-F32/970cfd49-b72c-4cf5-af05-1ecfc57c94d8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_L3-SMB-Instruct-12.2B-F32/1762652579.541919", + "retrieved_timestamp": "1762652579.54192", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/L3-SMB-Instruct-12.2B-F32", + "developer": "DavidAU", + "inference_platform": "unknown", + "id": "DavidAU/L3-SMB-Instruct-12.2B-F32" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4303215468290802 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4786412360346213 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04682779456193353 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40872916666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3312001329787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 12.174 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/9dbf220a-cbe9-40da-814f-951205c3abbe.json b/data/hfopenllm_v2/DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/9dbf220a-cbe9-40da-814f-951205c3abbe.json new file mode 100644 index 000000000..bc0f245cc --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/9dbf220a-cbe9-40da-814f-951205c3abbe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/1762652579.542142", + "retrieved_timestamp": "1762652579.5421429", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B", + "developer": "DavidAU", + "inference_platform": "unknown", + "id": "DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34389309254998957 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4736328900737677 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40311458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3570478723404255 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 16.537 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/L3-Stheno-v3.2-12.2B-Instruct/51566db6-56e4-40bd-a248-6c968f2b83e8.json b/data/hfopenllm_v2/DavidAU/L3-Stheno-v3.2-12.2B-Instruct/51566db6-56e4-40bd-a248-6c968f2b83e8.json new file mode 100644 index 000000000..51769c270 --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/L3-Stheno-v3.2-12.2B-Instruct/51566db6-56e4-40bd-a248-6c968f2b83e8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_L3-Stheno-v3.2-12.2B-Instruct/1762652579.542359", + "retrieved_timestamp": "1762652579.54236", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/L3-Stheno-v3.2-12.2B-Instruct", + "developer": "DavidAU", + "inference_platform": "unknown", + "id": "DavidAU/L3-Stheno-v3.2-12.2B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4027945850343755 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4845980190500647 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41025 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3345246010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 12.174 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B/0982d599-57c7-4eeb-bd47-844879bb79a5.json b/data/hfopenllm_v2/DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B/0982d599-57c7-4eeb-bd47-844879bb79a5.json new file mode 100644 index 000000000..62dccdf4b --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B/0982d599-57c7-4eeb-bd47-844879bb79a5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_L3.1-Dark-Planet-SpinFire-Uncensored-8B/1762652579.542578", + "retrieved_timestamp": "1762652579.542578", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B", + "developer": "DavidAU", + "inference_platform": "unknown", + "id": "DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7042702252246262 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5260910165037093 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09290030211480363 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.354125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3670212765957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/a7df9a84-fa29-4c8e-8413-4542b5eafb63.json b/data/hfopenllm_v2/DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/a7df9a84-fa29-4c8e-8413-4542b5eafb63.json new file mode 100644 index 000000000..8c60a12c1 --- /dev/null +++ b/data/hfopenllm_v2/DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/a7df9a84-fa29-4c8e-8413-4542b5eafb63.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/1762652579.542795", + "retrieved_timestamp": "1762652579.5427961", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B", + "developer": "DavidAU", + "inference_platform": "unknown", + "id": "DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3345257250761313 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4420822344441435 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26057401812688824 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37486458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2892287234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 13.668 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Davidsv/SUONG-1/097e6cbe-88cd-4d61-bb4c-0b8ddb537abe.json b/data/hfopenllm_v2/Davidsv/SUONG-1/097e6cbe-88cd-4d61-bb4c-0b8ddb537abe.json new file mode 100644 index 000000000..d9d0aaaa6 --- /dev/null +++ b/data/hfopenllm_v2/Davidsv/SUONG-1/097e6cbe-88cd-4d61-bb4c-0b8ddb537abe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Davidsv_SUONG-1/1762652579.5439382", + "retrieved_timestamp": "1762652579.54394", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Davidsv/SUONG-1", + "developer": "Davidsv", + "inference_platform": "unknown", + "id": "Davidsv/SUONG-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2497207409673001 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28171339082318814 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24412751677852348 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35775 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1085438829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 2.879 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DavieLion/Lllma-3.2-1B/274ed35b-4abe-4f20-bd18-7e386a7fdaa5.json b/data/hfopenllm_v2/DavieLion/Lllma-3.2-1B/274ed35b-4abe-4f20-bd18-7e386a7fdaa5.json new file mode 100644 index 000000000..f49a90f71 --- /dev/null +++ b/data/hfopenllm_v2/DavieLion/Lllma-3.2-1B/274ed35b-4abe-4f20-bd18-7e386a7fdaa5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavieLion_Lllma-3.2-1B/1762652579.5458188", + "retrieved_timestamp": "1762652579.54582", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavieLion/Lllma-3.2-1B", + "developer": "DavieLion", + "inference_platform": "unknown", + "id": "DavieLion/Lllma-3.2-1B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1601439735457475 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2964692268500723 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24412751677852348 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35781250000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11261635638297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT/ea40f65f-60a8-4efa-aa8d-e2a64ef5999f.json b/data/hfopenllm_v2/DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT/ea40f65f-60a8-4efa-aa8d-e2a64ef5999f.json new file mode 100644 index 000000000..b2b91ce5c --- /dev/null +++ b/data/hfopenllm_v2/DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT/ea40f65f-60a8-4efa-aa8d-e2a64ef5999f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DebateLabKIT_Llama-3.1-Argunaut-1-8B-SFT/1762652579.546083", + "retrieved_timestamp": "1762652579.5460842", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT", + "developer": "DebateLabKIT", + "inference_platform": "unknown", + "id": "DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.551921124837653 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48238301936695316 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14501510574018128 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4503020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3472406914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Deci/DeciLM-7B-instruct/1b3a2041-d14f-44d1-9efd-dbeceaa67ee6.json b/data/hfopenllm_v2/Deci/DeciLM-7B-instruct/1b3a2041-d14f-44d1-9efd-dbeceaa67ee6.json new file mode 100644 index 000000000..e10f93127 --- /dev/null +++ b/data/hfopenllm_v2/Deci/DeciLM-7B-instruct/1b3a2041-d14f-44d1-9efd-dbeceaa67ee6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Deci_DeciLM-7B-instruct/1762652579.546672", + "retrieved_timestamp": "1762652579.546672", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Deci/DeciLM-7B-instruct", + "developer": "Deci", + "inference_platform": "unknown", + "id": "Deci/DeciLM-7B-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4880239985460799 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4589748654047652 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030211480362537766 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38841666666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26080452127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "DeciLMForCausalLM", + "params_billions": 7.044 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Deci/DeciLM-7B/f9d2408b-03dd-4cf8-851e-51a15ff13be9.json b/data/hfopenllm_v2/Deci/DeciLM-7B/f9d2408b-03dd-4cf8-851e-51a15ff13be9.json new file mode 100644 index 000000000..72de39763 --- /dev/null +++ b/data/hfopenllm_v2/Deci/DeciLM-7B/f9d2408b-03dd-4cf8-851e-51a15ff13be9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Deci_DeciLM-7B/1762652579.5463831", + "retrieved_timestamp": "1762652579.5463839", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Deci/DeciLM-7B", + "developer": "Deci", + "inference_platform": "unknown", + "id": "Deci/DeciLM-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28129474239462404 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44228566674266495 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028700906344410877 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43585416666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26919880319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "DeciLMForCausalLM", + "params_billions": 7.044 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0/d5d73b84-4436-47bf-967e-c9be94898189.json b/data/hfopenllm_v2/DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0/d5d73b84-4436-47bf-967e-c9be94898189.json new file mode 100644 index 000000000..f6f1d86a6 --- /dev/null +++ b/data/hfopenllm_v2/DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0/d5d73b84-4436-47bf-967e-c9be94898189.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_d2nwg_Llama-3.1-8B-Instruct-v0.0/1762652579.548984", + "retrieved_timestamp": "1762652579.548985", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0", + "developer": "DeepAutoAI", + "inference_platform": "unknown", + "id": "DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7892746800711002 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5080411642065981 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18051359516616314 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41346875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3877160904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0/fb8eb882-26a9-4008-9226-90d44d38b54f.json b/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0/fb8eb882-26a9-4008-9226-90d44d38b54f.json new file mode 100644 index 000000000..ee036c49c --- /dev/null +++ b/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0/fb8eb882-26a9-4008-9226-90d44d38b54f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.0/1762652579.5500422", + "retrieved_timestamp": "1762652579.5500429", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0", + "developer": "DeepAutoAI", + "inference_platform": "unknown", + "id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7889499860370484 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5125175335277464 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19184290030211482 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41213541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38954454787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1/a7ba1534-464f-45ba-834f-5f501b155c20.json b/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1/a7ba1534-464f-45ba-834f-5f501b155c20.json new file mode 100644 index 000000000..3f2348123 --- /dev/null +++ b/data/hfopenllm_v2/DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1/a7ba1534-464f-45ba-834f-5f501b155c20.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.1/1762652579.550273", + "retrieved_timestamp": "1762652579.5502741", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1", + "developer": "DeepAutoAI", + "inference_platform": "unknown", + "id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7889499860370484 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5125175335277464 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19184290030211482 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41213541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38954454787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B/5eb28bbd-8428-4385-b078-13e8a868e9f0.json b/data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B/5eb28bbd-8428-4385-b078-13e8a868e9f0.json new file mode 100644 index 000000000..3fe0eddfe --- /dev/null +++ b/data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B/5eb28bbd-8428-4385-b078-13e8a868e9f0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepMount00_Lexora-Lite-3B/1762652579.550504", + "retrieved_timestamp": "1762652579.550505", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepMount00/Lexora-Lite-3B", + "developer": "DeepMount00", + "inference_platform": "unknown", + "id": "DeepMount00/Lexora-Lite-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5775996577968678 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4873392373334518 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23036253776435045 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39660416666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3602061170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B_v2/bf38278f-6375-41a6-9744-04fb4a32ed72.json b/data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B_v2/bf38278f-6375-41a6-9744-04fb4a32ed72.json new file mode 100644 index 000000000..aa9a7d93c --- /dev/null +++ b/data/hfopenllm_v2/DeepMount00/Lexora-Lite-3B_v2/bf38278f-6375-41a6-9744-04fb4a32ed72.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepMount00_Lexora-Lite-3B_v2/1762652579.550789", + "retrieved_timestamp": "1762652579.550789", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepMount00/Lexora-Lite-3B_v2", + "developer": "DeepMount00", + "inference_platform": "unknown", + "id": "DeepMount00/Lexora-Lite-3B_v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49431840848947456 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48117654754683153 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2280966767371601 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38215625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35438829787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepMount00/Lexora-Medium-7B/cc8f594a-e2f7-49e3-8654-57f1b397797f.json b/data/hfopenllm_v2/DeepMount00/Lexora-Medium-7B/cc8f594a-e2f7-49e3-8654-57f1b397797f.json new file mode 100644 index 000000000..290a67676 --- /dev/null +++ b/data/hfopenllm_v2/DeepMount00/Lexora-Medium-7B/cc8f594a-e2f7-49e3-8654-57f1b397797f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepMount00_Lexora-Medium-7B/1762652579.551008", + "retrieved_timestamp": "1762652579.551009", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepMount00/Lexora-Medium-7B", + "developer": "DeepMount00", + "inference_platform": "unknown", + "id": "DeepMount00/Lexora-Medium-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4103379034295669 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5144844494250328 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22205438066465258 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44394791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43251329787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepMount00/Qwen2.5-7B-Instruct-MathCoder/ea1a36fb-66c0-4b1a-bdac-7ec2602a7c65.json b/data/hfopenllm_v2/DeepMount00/Qwen2.5-7B-Instruct-MathCoder/ea1a36fb-66c0-4b1a-bdac-7ec2602a7c65.json new file mode 100644 index 000000000..c97063c1b --- /dev/null +++ b/data/hfopenllm_v2/DeepMount00/Qwen2.5-7B-Instruct-MathCoder/ea1a36fb-66c0-4b1a-bdac-7ec2602a7c65.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2.5-7B-Instruct-MathCoder/1762652579.55323", + "retrieved_timestamp": "1762652579.553231", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepMount00/Qwen2.5-7B-Instruct-MathCoder", + "developer": "DeepMount00", + "inference_platform": "unknown", + "id": "DeepMount00/Qwen2.5-7B-Instruct-MathCoder" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15302508455342934 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2998444769655102 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0007552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3806354166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11178523936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DeepMount00/mergekit-ties-okvgjfz/34350829-d42d-4e67-b23f-171044428c1f.json b/data/hfopenllm_v2/DeepMount00/mergekit-ties-okvgjfz/34350829-d42d-4e67-b23f-171044428c1f.json new file mode 100644 index 000000000..94e14b85d --- /dev/null +++ b/data/hfopenllm_v2/DeepMount00/mergekit-ties-okvgjfz/34350829-d42d-4e67-b23f-171044428c1f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepMount00_mergekit-ties-okvgjfz/1762652579.5535848", + "retrieved_timestamp": "1762652579.553586", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepMount00/mergekit-ties-okvgjfz", + "developer": "DeepMount00", + "inference_platform": "unknown", + "id": "DeepMount00/mergekit-ties-okvgjfz" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15302508455342934 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2998444769655102 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0007552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3806354166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11178523936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Delta-Vector/Baldur-8B/6267c5c6-abd3-4eb0-94ca-5c569414e7a9.json b/data/hfopenllm_v2/Delta-Vector/Baldur-8B/6267c5c6-abd3-4eb0-94ca-5c569414e7a9.json new file mode 100644 index 000000000..fd3a3c053 --- /dev/null +++ b/data/hfopenllm_v2/Delta-Vector/Baldur-8B/6267c5c6-abd3-4eb0-94ca-5c569414e7a9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Delta-Vector_Baldur-8B/1762652579.5538838", + "retrieved_timestamp": "1762652579.553885", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Delta-Vector/Baldur-8B", + "developer": "Delta-Vector", + "inference_platform": "unknown", + "id": "Delta-Vector/Baldur-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47818233398493776 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5305842954529679 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43715624999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3654421542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Delta-Vector/Control-8B-V1.1/20796a87-8691-44b9-9b60-85ad3c7f4b7b.json b/data/hfopenllm_v2/Delta-Vector/Control-8B-V1.1/20796a87-8691-44b9-9b60-85ad3c7f4b7b.json new file mode 100644 index 000000000..a312468cb --- /dev/null +++ b/data/hfopenllm_v2/Delta-Vector/Control-8B-V1.1/20796a87-8691-44b9-9b60-85ad3c7f4b7b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Delta-Vector_Control-8B-V1.1/1762652579.5543838", + "retrieved_timestamp": "1762652579.554385", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Delta-Vector/Control-8B-V1.1", + "developer": "Delta-Vector", + "inference_platform": "unknown", + "id": "Delta-Vector/Control-8B-V1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5696562897556262 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49928406748541837 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12764350453172205 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42372916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37450132978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Delta-Vector/Control-8B/26dc4843-56a7-45b5-a61a-386e260574a2.json b/data/hfopenllm_v2/Delta-Vector/Control-8B/26dc4843-56a7-45b5-a61a-386e260574a2.json new file mode 100644 index 000000000..007a2da14 --- /dev/null +++ b/data/hfopenllm_v2/Delta-Vector/Control-8B/26dc4843-56a7-45b5-a61a-386e260574a2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Delta-Vector_Control-8B/1762652579.554166", + "retrieved_timestamp": "1762652579.554166", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Delta-Vector/Control-8B", + "developer": "Delta-Vector", + "inference_platform": "unknown", + "id": "Delta-Vector/Control-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5489733906035985 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5041458754993735 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13897280966767372 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43554166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3731715425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Delta-Vector/Darkens-8B/a1689935-8ccb-49a8-8c2a-8dbf32b7ac02.json b/data/hfopenllm_v2/Delta-Vector/Darkens-8B/a1689935-8ccb-49a8-8c2a-8dbf32b7ac02.json new file mode 100644 index 000000000..49c77e5d3 --- /dev/null +++ b/data/hfopenllm_v2/Delta-Vector/Darkens-8B/a1689935-8ccb-49a8-8c2a-8dbf32b7ac02.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Delta-Vector_Darkens-8B/1762652579.5545971", + "retrieved_timestamp": "1762652579.5545971", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Delta-Vector/Darkens-8B", + "developer": "Delta-Vector", + "inference_platform": "unknown", + "id": "Delta-Vector/Darkens-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25476624245889795 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5250590567372793 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4105520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3735871010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.414 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Delta-Vector/Henbane-7b-attempt2/73f9a017-15ac-42e6-9600-69b411de4086.json b/data/hfopenllm_v2/Delta-Vector/Henbane-7b-attempt2/73f9a017-15ac-42e6-9600-69b411de4086.json new file mode 100644 index 000000000..a137c4b60 --- /dev/null +++ b/data/hfopenllm_v2/Delta-Vector/Henbane-7b-attempt2/73f9a017-15ac-42e6-9600-69b411de4086.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Delta-Vector_Henbane-7b-attempt2/1762652579.55481", + "retrieved_timestamp": "1762652579.55481", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Delta-Vector/Henbane-7b-attempt2", + "developer": "Delta-Vector", + "inference_platform": "unknown", + "id": "Delta-Vector/Henbane-7b-attempt2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4157335868828043 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5061177974093075 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22734138972809667 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39734375000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4027593085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Delta-Vector/Odin-9B/586d4e20-c1f4-466a-8488-07ac18ad6253.json b/data/hfopenllm_v2/Delta-Vector/Odin-9B/586d4e20-c1f4-466a-8488-07ac18ad6253.json new file mode 100644 index 000000000..9d3f2912a --- /dev/null +++ b/data/hfopenllm_v2/Delta-Vector/Odin-9B/586d4e20-c1f4-466a-8488-07ac18ad6253.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Delta-Vector_Odin-9B/1762652579.555037", + "retrieved_timestamp": "1762652579.555038", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Delta-Vector/Odin-9B", + "developer": "Delta-Vector", + "inference_platform": "unknown", + "id": "Delta-Vector/Odin-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3691970637907419 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5440253444823155 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14501510574018128 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3414429530201342 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46478125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4046708776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Delta-Vector/Tor-8B/ce7e8e58-e323-4704-b6f3-7fa6c5c3b7f2.json b/data/hfopenllm_v2/Delta-Vector/Tor-8B/ce7e8e58-e323-4704-b6f3-7fa6c5c3b7f2.json new file mode 100644 index 000000000..007c691da --- /dev/null +++ b/data/hfopenllm_v2/Delta-Vector/Tor-8B/ce7e8e58-e323-4704-b6f3-7fa6c5c3b7f2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Delta-Vector_Tor-8B/1762652579.555239", + "retrieved_timestamp": "1762652579.55524", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Delta-Vector/Tor-8B", + "developer": "Delta-Vector", + "inference_platform": "unknown", + "id": "Delta-Vector/Tor-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23815476269631244 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5209108776928992 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40921874999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37300531914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.414 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test/3c4058cd-238b-4b01-870d-8693f5ce1b8f.json b/data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test/3c4058cd-238b-4b01-870d-8693f5ce1b8f.json new file mode 100644 index 000000000..0a947bb76 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test/3c4058cd-238b-4b01-870d-8693f5ce1b8f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_L3-8B-R1-WolfCore-V1.5-test/1762652579.556192", + "retrieved_timestamp": "1762652579.556193", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3955006050612375 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5314954163679548 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12311178247734139 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3263422818791946 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3840729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37275598404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore/6d8d63c0-ad69-4224-8250-b1664f6abbcf.json b/data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore/6d8d63c0-ad69-4224-8250-b1664f6abbcf.json new file mode 100644 index 000000000..6815336b1 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/L3-8B-R1-WolfCore/6d8d63c0-ad69-4224-8250-b1664f6abbcf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_L3-8B-R1-WolfCore/1762652579.555949", + "retrieved_timestamp": "1762652579.5559502", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/L3-8B-R1-WolfCore", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/L3-8B-R1-WolfCore" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3775404814780339 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.531794652653343 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16314199395770393 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288590604026846 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42766666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3716755319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/L3-8B-WolfCore/c6771d5c-acaf-4b17-96b4-abf3b75bc68f.json b/data/hfopenllm_v2/DoppelReflEx/L3-8B-WolfCore/c6771d5c-acaf-4b17-96b4-abf3b75bc68f.json new file mode 100644 index 000000000..30ac21a40 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/L3-8B-WolfCore/c6771d5c-acaf-4b17-96b4-abf3b75bc68f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_L3-8B-WolfCore/1762652579.556399", + "retrieved_timestamp": "1762652579.5564", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/L3-8B-WolfCore", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/L3-8B-WolfCore" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4021950646506824 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5181980783946081 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09818731117824774 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39728125000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3705119680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame-test/ef5bb4eb-0875-4cc5-8e27-b59ffbd2e477.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame-test/ef5bb4eb-0875-4cc5-8e27-b59ffbd2e477.json new file mode 100644 index 000000000..cee948dd5 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame-test/ef5bb4eb-0875-4cc5-8e27-b59ffbd2e477.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-FoxFrame-test/1762652579.556618", + "retrieved_timestamp": "1762652579.556619", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-FoxFrame-test", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-FoxFrame-test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42220308780701876 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5456376527271466 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13972809667673716 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42540625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3503158244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame2-test/e46698de-8b2d-4b3c-b482-8cc8a3665eac.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame2-test/e46698de-8b2d-4b3c-b482-8cc8a3665eac.json new file mode 100644 index 000000000..0d5b8cc29 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame2-test/e46698de-8b2d-4b3c-b482-8cc8a3665eac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-FoxFrame2-test/1762652579.556837", + "retrieved_timestamp": "1762652579.5568378", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-FoxFrame2-test", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-FoxFrame2-test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43189514931492884 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5484795753806021 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1404833836858006 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4251875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3568816489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame3-test/35351894-ea9d-456b-ab9a-c98686948e6b.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame3-test/35351894-ea9d-456b-ab9a-c98686948e6b.json new file mode 100644 index 000000000..a4af87556 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-FoxFrame3-test/35351894-ea9d-456b-ab9a-c98686948e6b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-FoxFrame3-test/1762652579.557049", + "retrieved_timestamp": "1762652579.5570502", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-FoxFrame3-test", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-FoxFrame3-test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43231957871780213 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5394764281718397 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13217522658610273 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45976041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35289228723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Kakigori/2f19082b-8377-4f63-8c5f-1aa25071a240.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Kakigori/2f19082b-8377-4f63-8c5f-1aa25071a240.json new file mode 100644 index 000000000..049afa2cc --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Kakigori/2f19082b-8377-4f63-8c5f-1aa25071a240.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Kakigori/1762652579.5572648", + "retrieved_timestamp": "1762652579.557266", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-Kakigori", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-Kakigori" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.359329911302012 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5415529337961275 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11933534743202417 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40521875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3581283244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-2/630c100f-c88d-42a7-9614-bd9a958eab2b.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-2/630c100f-c88d-42a7-9614-bd9a958eab2b.json new file mode 100644 index 000000000..b8b4afe65 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-2/630c100f-c88d-42a7-9614-bd9a958eab2b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame-Experiment-2/1762652579.5578592", + "retrieved_timestamp": "1762652579.5578601", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-LilithFrame-Experiment-2", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4299469851106176 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4982672766561394 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10725075528700906 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32550335570469796 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3804479166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32762632978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-3/37292ca7-9e82-4c80-bc6e-bc7e1be7a95e.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-3/37292ca7-9e82-4c80-bc6e-bc7e1be7a95e.json new file mode 100644 index 000000000..bea82e52b --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-3/37292ca7-9e82-4c80-bc6e-bc7e1be7a95e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame-Experiment-3/1762652579.558079", + "retrieved_timestamp": "1762652579.558079", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-LilithFrame-Experiment-3", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4127858526487498 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5468080647121653 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13444108761329304 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4038541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3603723404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-4/ecc18f9c-c495-4ae6-8fd8-b2f84fb453ac.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-4/ecc18f9c-c495-4ae6-8fd8-b2f84fb453ac.json new file mode 100644 index 000000000..4249b365d --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame-Experiment-4/ecc18f9c-c495-4ae6-8fd8-b2f84fb453ac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame-Experiment-4/1762652579.5582879", + "retrieved_timestamp": "1762652579.5582888", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-LilithFrame-Experiment-4", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3981480250180632 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5534370722864824 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12235649546827794 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43706249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3648603723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/3d68e2fb-06cc-43b9-830b-f1cd02f12166.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/3d68e2fb-06cc-43b9-830b-f1cd02f12166.json new file mode 100644 index 000000000..88de0d374 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/3d68e2fb-06cc-43b9-830b-f1cd02f12166.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame/1762652579.557674", + "retrieved_timestamp": "1762652579.5576751", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-LilithFrame", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-LilithFrame" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43604192431636946 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4956125598349656 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3842604166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32372007978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/a04a8775-8b4d-4608-9692-47af9f7ed5a7.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/a04a8775-8b4d-4608-9692-47af9f7ed5a7.json new file mode 100644 index 000000000..3d42198fe --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-LilithFrame/a04a8775-8b4d-4608-9692-47af9f7ed5a7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame/1762652579.557468", + "retrieved_timestamp": "1762652579.557469", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-LilithFrame", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-LilithFrame" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4509545782966972 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4944264226434414 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11555891238670694 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3895625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3256316489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-GreenSnake/9b9eb072-4120-4a6a-a565-27136e617f10.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-GreenSnake/9b9eb072-4120-4a6a-a565-27136e617f10.json new file mode 100644 index 000000000..f2718c16e --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-GreenSnake/9b9eb072-4120-4a6a-a565-27136e617f10.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-GreenSnake/1762652579.5585039", + "retrieved_timestamp": "1762652579.558505", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-Mimicore-GreenSnake", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-Mimicore-GreenSnake" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47800724300411795 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5480509710089697 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13897280966767372 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4305833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3651097074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Nocturne/6a21892f-1d11-4c59-8894-8800822b2e72.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Nocturne/6a21892f-1d11-4c59-8894-8800822b2e72.json new file mode 100644 index 000000000..2dfeac638 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Nocturne/6a21892f-1d11-4c59-8894-8800822b2e72.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Nocturne/1762652579.558723", + "retrieved_timestamp": "1762652579.5587242", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-Mimicore-Nocturne", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-Mimicore-Nocturne" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3956502081144696 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5703329773483826 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10574018126888217 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45690625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36336436170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment/db8eedcc-1dcf-47af-9c2b-a72da97146ca.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment/db8eedcc-1dcf-47af-9c2b-a72da97146ca.json new file mode 100644 index 000000000..5d3debf64 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment/db8eedcc-1dcf-47af-9c2b-a72da97146ca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Orochi-v2-Experiment/1762652579.5591779", + "retrieved_timestamp": "1762652579.559179", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2842413684579139 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5322525988273211 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45737500000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3423371010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment/8198ab16-4a8b-4da9-8e8a-d1e3beb02839.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment/8198ab16-4a8b-4da9-8e8a-d1e3beb02839.json new file mode 100644 index 000000000..cd4f535d7 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment/8198ab16-4a8b-4da9-8e8a-d1e3beb02839.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Orochi-v3-Experiment/1762652579.559391", + "retrieved_timestamp": "1762652579.559392", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4101628124487471 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5437817873983797 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1216012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44379166666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.339594414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment/e4e71999-6f83-4745-8a9d-66e711e39ac3.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment/e4e71999-6f83-4745-8a9d-66e711e39ac3.json new file mode 100644 index 000000000..3b8dc74a5 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment/e4e71999-6f83-4745-8a9d-66e711e39ac3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Orochi-v4-Experiment/1762652579.559606", + "retrieved_timestamp": "1762652579.559606", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4320702402957486 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5462502212045214 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4449375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3519780585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi/f1bfef73-3586-4f9d-80ca-71b0fb00aadd.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi/f1bfef73-3586-4f9d-80ca-71b0fb00aadd.json new file mode 100644 index 000000000..2aa943f1d --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-Orochi/f1bfef73-3586-4f9d-80ca-71b0fb00aadd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Orochi/1762652579.558937", + "retrieved_timestamp": "1762652579.558938", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-Mimicore-Orochi", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-Mimicore-Orochi" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4620451513096362 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.54977394640115 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13595166163141995 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45458333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34466422872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/aa2478d9-59bd-458b-abee-5669aa6280df.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/aa2478d9-59bd-458b-abee-5669aa6280df.json new file mode 100644 index 000000000..874d0e54c --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/aa2478d9-59bd-458b-abee-5669aa6280df.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/1762652579.5600362", + "retrieved_timestamp": "1762652579.5600362", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39090391272933595 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48656395204478037 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07854984894259819 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3789583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31141954787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/66bd7a21-6f85-49b5-bc01-3f52ed8d1c64.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/66bd7a21-6f85-49b5-bc01-3f52ed8d1c64.json new file mode 100644 index 000000000..80b632ad0 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/66bd7a21-6f85-49b5-bc01-3f52ed8d1c64.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/1762652579.560246", + "retrieved_timestamp": "1762652579.560246", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31239333856389934 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5126398500939828 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11253776435045318 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39746875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33136635638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/1a3eefa6-7b3d-4541-93b0-8fe86f6bf038.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/1a3eefa6-7b3d-4541-93b0-8fe86f6bf038.json new file mode 100644 index 000000000..bc70dc1d4 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/1a3eefa6-7b3d-4541-93b0-8fe86f6bf038.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/1762652579.56046", + "retrieved_timestamp": "1762652579.560461", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4302218114602588 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4811798810475259 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08987915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3684166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31981382978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/d7303703-f33e-430b-813d-998c95dbdb67.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/d7303703-f33e-430b-813d-998c95dbdb67.json new file mode 100644 index 000000000..9c4e26119 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/d7303703-f33e-430b-813d-998c95dbdb67.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/1762652579.560668", + "retrieved_timestamp": "1762652579.560668", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42405151664250856 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5184748714407336 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11404833836858005 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40019791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3341921542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake/8aa34df4-8347-4f2d-98a0-7ec58bd62e43.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake/8aa34df4-8347-4f2d-98a0-7ec58bd62e43.json new file mode 100644 index 000000000..4fc07e347 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Mimicore-WhiteSnake/8aa34df4-8347-4f2d-98a0-7ec58bd62e43.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake/1762652579.55982", + "retrieved_timestamp": "1762652579.5598211", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44376033369238066 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5604605871844869 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13141993957703926 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.456875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3657746010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-Unleashed-Twilight/00f0fe96-4a06-46e7-88d8-368b86bcdb06.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Unleashed-Twilight/00f0fe96-4a06-46e7-88d8-368b86bcdb06.json new file mode 100644 index 000000000..fb25db547 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-Unleashed-Twilight/00f0fe96-4a06-46e7-88d8-368b86bcdb06.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Unleashed-Twilight/1762652579.560919", + "retrieved_timestamp": "1762652579.56092", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-Unleashed-Twilight", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-Unleashed-Twilight" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3505121965274361 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5520627163174447 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09592145015105741 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288590604026846 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4383958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3677692819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MN-12B-WolFrame/3bb96e7a-6c09-4b9e-8f2b-0b525c2ebeb3.json b/data/hfopenllm_v2/DoppelReflEx/MN-12B-WolFrame/3bb96e7a-6c09-4b9e-8f2b-0b525c2ebeb3.json new file mode 100644 index 000000000..76cd34d94 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MN-12B-WolFrame/3bb96e7a-6c09-4b9e-8f2b-0b525c2ebeb3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-WolFrame/1762652579.5611808", + "retrieved_timestamp": "1762652579.561182", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MN-12B-WolFrame", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MN-12B-WolFrame" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4397387819873491 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.511681287565329 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13141993957703926 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40146875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33934507978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-test/e6031abf-1ae2-431c-8247-3124fff41d17.json b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-test/e6031abf-1ae2-431c-8247-3124fff41d17.json new file mode 100644 index 000000000..c562fc9b2 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-test/e6031abf-1ae2-431c-8247-3124fff41d17.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B-test/1762652579.5616372", + "retrieved_timestamp": "1762652579.5616379", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MiniusLight-24B-test", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MiniusLight-24B-test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03936776641533354 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6333927323374534 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0256797583081571 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36828859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40925000000000006 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5182014627659575 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1b-test/2917ef74-c8cb-4255-8bda-76280fbe7c64.json b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1b-test/2917ef74-c8cb-4255-8bda-76280fbe7c64.json new file mode 100644 index 000000000..7c4010756 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1b-test/2917ef74-c8cb-4255-8bda-76280fbe7c64.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B-v1b-test/1762652579.561931", + "retrieved_timestamp": "1762652579.561932", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MiniusLight-24B-v1b-test", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MiniusLight-24B-v1b-test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37911408396388246 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6617145681113757 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2394259818731118 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37919463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4557291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5364860372340425 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1c-test/23a21492-0897-44b4-a046-cf93fa8c2a64.json b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1c-test/23a21492-0897-44b4-a046-cf93fa8c2a64.json new file mode 100644 index 000000000..2fc689334 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1c-test/23a21492-0897-44b4-a046-cf93fa8c2a64.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B-v1c-test/1762652579.562173", + "retrieved_timestamp": "1762652579.5621738", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MiniusLight-24B-v1c-test", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MiniusLight-24B-v1c-test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37858881102142317 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6752681657268389 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29682779456193353 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3951342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46341666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5487034574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1d-test/af67712e-7436-4703-ac22-9878dd8e190a.json b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1d-test/af67712e-7436-4703-ac22-9878dd8e190a.json new file mode 100644 index 000000000..9e2cb1429 --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B-v1d-test/af67712e-7436-4703-ac22-9878dd8e190a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B-v1d-test/1762652579.5624058", + "retrieved_timestamp": "1762652579.5624058", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MiniusLight-24B-v1d-test", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MiniusLight-24B-v1d-test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40324339419407174 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6712025325276962 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3951342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46208333333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5488696808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B/2ec36e2e-0fba-4c6a-b9d0-fe57e7d708ef.json b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B/2ec36e2e-0fba-4c6a-b9d0-fe57e7d708ef.json new file mode 100644 index 000000000..3780b3adc --- /dev/null +++ b/data/hfopenllm_v2/DoppelReflEx/MiniusLight-24B/2ec36e2e-0fba-4c6a-b9d0-fe57e7d708ef.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B/1762652579.561418", + "retrieved_timestamp": "1762652579.561419", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DoppelReflEx/MiniusLight-24B", + "developer": "DoppelReflEx", + "inference_platform": "unknown", + "id": "DoppelReflEx/MiniusLight-24B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25766410900854175 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6256461050033514 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12613293051359517 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35822147651006714 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43191666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5091422872340425 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Again-8B-Model_Stock/cd2de45f-874a-4d63-bb6d-0afe5e687964.json b/data/hfopenllm_v2/DreadPoor/Again-8B-Model_Stock/cd2de45f-874a-4d63-bb6d-0afe5e687964.json new file mode 100644 index 000000000..945bbd224 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Again-8B-Model_Stock/cd2de45f-874a-4d63-bb6d-0afe5e687964.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Again-8B-Model_Stock/1762652579.562616", + "retrieved_timestamp": "1762652579.562617", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Again-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Again-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6724213974476612 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5309801059970912 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12009063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39867708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.351811835106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Alita99-8B-LINEAR/570c991f-06bc-45d1-8409-d779a07df9a6.json b/data/hfopenllm_v2/DreadPoor/Alita99-8B-LINEAR/570c991f-06bc-45d1-8409-d779a07df9a6.json new file mode 100644 index 000000000..9f47b0d52 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Alita99-8B-LINEAR/570c991f-06bc-45d1-8409-d779a07df9a6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Alita99-8B-LINEAR/1762652579.562879", + "retrieved_timestamp": "1762652579.56288", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Alita99-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Alita99-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7190077882241341 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5441767095577089 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42664583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38090093085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/AnotherTest/81ec7c1a-8874-44c3-b482-8a8ecfb2ae72.json b/data/hfopenllm_v2/DreadPoor/AnotherTest/81ec7c1a-8874-44c3-b482-8a8ecfb2ae72.json new file mode 100644 index 000000000..0a105c768 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/AnotherTest/81ec7c1a-8874-44c3-b482-8a8ecfb2ae72.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_AnotherTest/1762652579.563089", + "retrieved_timestamp": "1762652579.563089", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/AnotherTest", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/AnotherTest" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47006387496287627 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46834113564549334 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.061933534743202415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42128125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2874833776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aspire-8B-model_stock/28bd44a9-d916-4a0b-b0ae-c6a4cb5d727d.json b/data/hfopenllm_v2/DreadPoor/Aspire-8B-model_stock/28bd44a9-d916-4a0b-b0ae-c6a4cb5d727d.json new file mode 100644 index 000000000..fce22b642 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aspire-8B-model_stock/28bd44a9-d916-4a0b-b0ae-c6a4cb5d727d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire-8B-model_stock/1762652579.5633001", + "retrieved_timestamp": "1762652579.563301", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Aspire-8B-model_stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Aspire-8B-model_stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7140620221013578 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5278251846388996 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14954682779456194 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42124999999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37632978723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_1.3-8B_model-stock/917a9361-af08-4e12-a93a-01321629b31f.json b/data/hfopenllm_v2/DreadPoor/Aspire_1.3-8B_model-stock/917a9361-af08-4e12-a93a-01321629b31f.json new file mode 100644 index 000000000..fb1ba8365 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aspire_1.3-8B_model-stock/917a9361-af08-4e12-a93a-01321629b31f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_1.3-8B_model-stock/1762652579.563606", + "retrieved_timestamp": "1762652579.563607", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Aspire_1.3-8B_model-stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Aspire_1.3-8B_model-stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7061685217445268 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5301644606574212 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1691842900302115 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4104583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37159242021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V2-8B-Model_Stock/677221cd-f218-4982-8363-d969913d7a22.json b/data/hfopenllm_v2/DreadPoor/Aspire_V2-8B-Model_Stock/677221cd-f218-4982-8363-d969913d7a22.json new file mode 100644 index 000000000..d0a25b481 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aspire_V2-8B-Model_Stock/677221cd-f218-4982-8363-d969913d7a22.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V2-8B-Model_Stock/1762652579.56384", + "retrieved_timestamp": "1762652579.563841", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Aspire_V2-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Aspire_V2-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7371430027881576 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5329650089428358 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17598187311178248 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38937499999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3696808510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V2.1-8B-Model_Stock/292e77cb-e6e6-4d10-9956-1e09369e9669.json b/data/hfopenllm_v2/DreadPoor/Aspire_V2.1-8B-Model_Stock/292e77cb-e6e6-4d10-9956-1e09369e9669.json new file mode 100644 index 000000000..d34c6c477 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aspire_V2.1-8B-Model_Stock/292e77cb-e6e6-4d10-9956-1e09369e9669.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V2.1-8B-Model_Stock/1762652579.564126", + "retrieved_timestamp": "1762652579.564127", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Aspire_V2.1-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Aspire_V2.1-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7237540836092679 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5236395810818485 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17673716012084592 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41359375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3800698138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT-8B-Model_Stock/62414bde-98c1-4cae-af6d-18d3b0ecd50a.json b/data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT-8B-Model_Stock/62414bde-98c1-4cae-af6d-18d3b0ecd50a.json new file mode 100644 index 000000000..994d8bc3d --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT-8B-Model_Stock/62414bde-98c1-4cae-af6d-18d3b0ecd50a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V2_ALT-8B-Model_Stock/1762652579.5643399", + "retrieved_timestamp": "1762652579.564341", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Aspire_V2_ALT-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Aspire_V2_ALT-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7381170848903134 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5265819478728287 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1729607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39749999999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3726728723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock/3258c5c6-d12d-4e09-8404-22b6aaf82e87.json b/data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock/3258c5c6-d12d-4e09-8404-22b6aaf82e87.json new file mode 100644 index 000000000..c85eb1266 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock/3258c5c6-d12d-4e09-8404-22b6aaf82e87.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V2_ALT_ROW-8B-Model_Stock/1762652579.564561", + "retrieved_timestamp": "1762652579.5645618", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7381170848903134 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5265819478728287 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1729607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39749999999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3726728723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V3-8B-Model_Stock/3cc8c02f-87a8-428a-8991-a0d52500d927.json b/data/hfopenllm_v2/DreadPoor/Aspire_V3-8B-Model_Stock/3cc8c02f-87a8-428a-8991-a0d52500d927.json new file mode 100644 index 000000000..d37945534 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aspire_V3-8B-Model_Stock/3cc8c02f-87a8-428a-8991-a0d52500d927.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V3-8B-Model_Stock/1762652579.5648441", + "retrieved_timestamp": "1762652579.564845", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Aspire_V3-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Aspire_V3-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5118795905973927 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5267958758971987 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18580060422960726 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40149999999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36419547872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V4-8B-Model_Stock/692e0ff5-0607-4aae-8996-45bbbc4d2288.json b/data/hfopenllm_v2/DreadPoor/Aspire_V4-8B-Model_Stock/692e0ff5-0607-4aae-8996-45bbbc4d2288.json new file mode 100644 index 000000000..7252a1a00 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aspire_V4-8B-Model_Stock/692e0ff5-0607-4aae-8996-45bbbc4d2288.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V4-8B-Model_Stock/1762652579.565063", + "retrieved_timestamp": "1762652579.565064", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Aspire_V4-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Aspire_V4-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.769416259967996 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5314037161536506 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19259818731117825 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3867395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.370844414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aspire_V4_ALT-8B-Model_Stock/7b634b21-8d89-4656-89d7-3590fc8a883a.json b/data/hfopenllm_v2/DreadPoor/Aspire_V4_ALT-8B-Model_Stock/7b634b21-8d89-4656-89d7-3590fc8a883a.json new file mode 100644 index 000000000..206d588e5 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aspire_V4_ALT-8B-Model_Stock/7b634b21-8d89-4656-89d7-3590fc8a883a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V4_ALT-8B-Model_Stock/1762652579.565274", + "retrieved_timestamp": "1762652579.565275", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Aspire_V4_ALT-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Aspire_V4_ALT-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7365933500888753 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5268232518944024 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18126888217522658 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3920416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3681848404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Asymmetric_Linearity-8B-Model_Stock/ad58e69a-0917-4375-9e83-5db2ad50d0ca.json b/data/hfopenllm_v2/DreadPoor/Asymmetric_Linearity-8B-Model_Stock/ad58e69a-0917-4375-9e83-5db2ad50d0ca.json new file mode 100644 index 000000000..e50673e70 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Asymmetric_Linearity-8B-Model_Stock/ad58e69a-0917-4375-9e83-5db2ad50d0ca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Asymmetric_Linearity-8B-Model_Stock/1762652579.5654871", + "retrieved_timestamp": "1762652579.565488", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Asymmetric_Linearity-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Asymmetric_Linearity-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7174341857382855 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.546535755155883 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41994791666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3843916223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LINEAR/c8b72a17-837a-45ed-b285-bf472a4f6d45.json b/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LINEAR/c8b72a17-837a-45ed-b285-bf472a4f6d45.json new file mode 100644 index 000000000..cc9f00545 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LINEAR/c8b72a17-837a-45ed-b285-bf472a4f6d45.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aurora_faustus-8B-LINEAR/1762652579.565701", + "retrieved_timestamp": "1762652579.565702", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Aurora_faustus-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Aurora_faustus-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7281003293483512 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5515538279425277 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17069486404833836 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4145833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3842253989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED/05707286-d03b-4cb2-9a0f-48245c867cc7.json b/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED/05707286-d03b-4cb2-9a0f-48245c867cc7.json new file mode 100644 index 000000000..55aea1d9b --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED/05707286-d03b-4cb2-9a0f-48245c867cc7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aurora_faustus-8B-LORABLATED/1762652579.565921", + "retrieved_timestamp": "1762652579.565921", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Aurora_faustus-8B-LORABLATED", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Aurora_faustus-8B-LORABLATED" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7527050448365891 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.539159616655651 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1487915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42385416666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36727061170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED_ALT/2b644863-f52f-487a-85d1-3fc3ce973d90.json b/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED_ALT/2b644863-f52f-487a-85d1-3fc3ce973d90.json new file mode 100644 index 000000000..0285f5489 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Aurora_faustus-8B-LORABLATED_ALT/2b644863-f52f-487a-85d1-3fc3ce973d90.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Aurora_faustus-8B-LORABLATED_ALT/1762652579.566129", + "retrieved_timestamp": "1762652579.56613", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Aurora_faustus-8B-LORABLATED_ALT", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Aurora_faustus-8B-LORABLATED_ALT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7377923908562614 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5387670721191214 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15861027190332327 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4225208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36943151595744683 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Autumn_Dawn-8B-LINEAR/4f1d1b68-311f-4409-bf5b-41629a889da3.json b/data/hfopenllm_v2/DreadPoor/Autumn_Dawn-8B-LINEAR/4f1d1b68-311f-4409-bf5b-41629a889da3.json new file mode 100644 index 000000000..a6e629a23 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Autumn_Dawn-8B-LINEAR/4f1d1b68-311f-4409-bf5b-41629a889da3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Autumn_Dawn-8B-LINEAR/1762652579.566346", + "retrieved_timestamp": "1762652579.5663471", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Autumn_Dawn-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Autumn_Dawn-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7292993701157373 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5459436958014627 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18580060422960726 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4185520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39677526595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/BaeZel-8B-LINEAR/f3af4295-9508-4a3e-ba5a-6336a560fd6c.json b/data/hfopenllm_v2/DreadPoor/BaeZel-8B-LINEAR/f3af4295-9508-4a3e-ba5a-6336a560fd6c.json new file mode 100644 index 000000000..dfad81f64 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/BaeZel-8B-LINEAR/f3af4295-9508-4a3e-ba5a-6336a560fd6c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel-8B-LINEAR/1762652579.56655", + "retrieved_timestamp": "1762652579.566551", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/BaeZel-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/BaeZel-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7377923908562614 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5463800554321383 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18126888217522658 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4227083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3861369680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/BaeZel-8B-Model_Stock/31395ff6-82da-4585-85d6-459fcac9408f.json b/data/hfopenllm_v2/DreadPoor/BaeZel-8B-Model_Stock/31395ff6-82da-4585-85d6-459fcac9408f.json new file mode 100644 index 000000000..a8f953dd2 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/BaeZel-8B-Model_Stock/31395ff6-82da-4585-85d6-459fcac9408f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel-8B-Model_Stock/1762652579.566763", + "retrieved_timestamp": "1762652579.566764", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/BaeZel-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/BaeZel-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7713145564878965 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5407680550216925 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16389728096676737 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41991666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38804853723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/BaeZel_V2-8B-Model_Stock/cdacd0e9-fa22-4053-b16d-d3bac8541829.json b/data/hfopenllm_v2/DreadPoor/BaeZel_V2-8B-Model_Stock/cdacd0e9-fa22-4053-b16d-d3bac8541829.json new file mode 100644 index 000000000..244bf1daf --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/BaeZel_V2-8B-Model_Stock/cdacd0e9-fa22-4053-b16d-d3bac8541829.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel_V2-8B-Model_Stock/1762652579.566977", + "retrieved_timestamp": "1762652579.566978", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/BaeZel_V2-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/BaeZel_V2-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7676675665013276 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5373871612758611 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4185833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3946974734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/BaeZel_V2_ALT-8B-Model_Stock/08ac7c80-0f13-43c9-a538-683eb6927b59.json b/data/hfopenllm_v2/DreadPoor/BaeZel_V2_ALT-8B-Model_Stock/08ac7c80-0f13-43c9-a538-683eb6927b59.json new file mode 100644 index 000000000..8afe61387 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/BaeZel_V2_ALT-8B-Model_Stock/08ac7c80-0f13-43c9-a538-683eb6927b59.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel_V2_ALT-8B-Model_Stock/1762652579.567195", + "retrieved_timestamp": "1762652579.567196", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/BaeZel_V2_ALT-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/BaeZel_V2_ALT-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7676675665013276 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5373871612758611 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4185833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3946974734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/BaeZel_V3-8B-Model_Stock/91ec0c61-73ca-463f-b3be-3386293e4fc0.json b/data/hfopenllm_v2/DreadPoor/BaeZel_V3-8B-Model_Stock/91ec0c61-73ca-463f-b3be-3386293e4fc0.json new file mode 100644 index 000000000..6a6fea4da --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/BaeZel_V3-8B-Model_Stock/91ec0c61-73ca-463f-b3be-3386293e4fc0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel_V3-8B-Model_Stock/1762652579.5674188", + "retrieved_timestamp": "1762652579.56742", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/BaeZel_V3-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/BaeZel_V3-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7831797408653485 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.539231076759135 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18957703927492447 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41743749999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3887965425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Blunt_Edge-8B-SLERP/35807c64-beed-4022-a4ba-1284c5f6124f.json b/data/hfopenllm_v2/DreadPoor/Blunt_Edge-8B-SLERP/35807c64-beed-4022-a4ba-1284c5f6124f.json new file mode 100644 index 000000000..5cde93855 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Blunt_Edge-8B-SLERP/35807c64-beed-4022-a4ba-1284c5f6124f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Blunt_Edge-8B-SLERP/1762652579.567633", + "retrieved_timestamp": "1762652579.5676339", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Blunt_Edge-8B-SLERP", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Blunt_Edge-8B-SLERP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7496575752337131 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5389470863694941 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18580060422960726 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.417375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37666223404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/BulkUp/3c2e7750-3257-4012-8b43-44387707170c.json b/data/hfopenllm_v2/DreadPoor/BulkUp/3c2e7750-3257-4012-8b43-44387707170c.json new file mode 100644 index 000000000..1ae3a6f22 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/BulkUp/3c2e7750-3257-4012-8b43-44387707170c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_BulkUp/1762652579.567868", + "retrieved_timestamp": "1762652579.567869", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/BulkUp", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/BulkUp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.177804891022487 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28698602947692575 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24748322147651006 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3446666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11095412234042554 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Cadence-8B-LINEAR/8be55d6b-7fe0-41cf-86a6-66327dd88003.json b/data/hfopenllm_v2/DreadPoor/Cadence-8B-LINEAR/8be55d6b-7fe0-41cf-86a6-66327dd88003.json new file mode 100644 index 000000000..09b67b6c6 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Cadence-8B-LINEAR/8be55d6b-7fe0-41cf-86a6-66327dd88003.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Cadence-8B-LINEAR/1762652579.568077", + "retrieved_timestamp": "1762652579.568078", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Cadence-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Cadence-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7682172192006099 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5433358555450108 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16767371601208458 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41734374999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3803191489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Caelid-8B-Model_Stock/8b15f9a3-6f39-4210-b48f-4dc5569114e2.json b/data/hfopenllm_v2/DreadPoor/Caelid-8B-Model_Stock/8b15f9a3-6f39-4210-b48f-4dc5569114e2.json new file mode 100644 index 000000000..ebb4c22ff --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Caelid-8B-Model_Stock/8b15f9a3-6f39-4210-b48f-4dc5569114e2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Caelid-8B-Model_Stock/1762652579.5682912", + "retrieved_timestamp": "1762652579.5682921", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Caelid-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Caelid-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7247281657114235 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5459605196913864 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1510574018126888 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4001041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3816489361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Casuar-9B-Model_Stock/7c5c8fd8-2fbb-41f3-88f3-92a544200204.json b/data/hfopenllm_v2/DreadPoor/Casuar-9B-Model_Stock/7c5c8fd8-2fbb-41f3-88f3-92a544200204.json new file mode 100644 index 000000000..6e6613787 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Casuar-9B-Model_Stock/7c5c8fd8-2fbb-41f3-88f3-92a544200204.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Casuar-9B-Model_Stock/1762652579.5685189", + "retrieved_timestamp": "1762652579.5685189", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Casuar-9B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Casuar-9B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7764852812759035 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6106681877306871 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3447986577181208 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41654166666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4156416223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Condensed_Milk-8B-Model_Stock/58573d8e-602a-4088-8dec-a738b7e55e9c.json b/data/hfopenllm_v2/DreadPoor/Condensed_Milk-8B-Model_Stock/58573d8e-602a-4088-8dec-a738b7e55e9c.json new file mode 100644 index 000000000..a17811bfa --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Condensed_Milk-8B-Model_Stock/58573d8e-602a-4088-8dec-a738b7e55e9c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Condensed_Milk-8B-Model_Stock/1762652579.568758", + "retrieved_timestamp": "1762652579.568759", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Condensed_Milk-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Condensed_Milk-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7536292592543341 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5434864122121906 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17447129909365558 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41601041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38763297872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/CoolerCoder-8B-LINEAR/b3bc4e42-5850-45bd-a0a1-ff6779c04fce.json b/data/hfopenllm_v2/DreadPoor/CoolerCoder-8B-LINEAR/b3bc4e42-5850-45bd-a0a1-ff6779c04fce.json new file mode 100644 index 000000000..3652f2100 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/CoolerCoder-8B-LINEAR/b3bc4e42-5850-45bd-a0a1-ff6779c04fce.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_CoolerCoder-8B-LINEAR/1762652579.568993", + "retrieved_timestamp": "1762652579.568993", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/CoolerCoder-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/CoolerCoder-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4519286603988528 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4761504835496542 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07930513595166164 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3963541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31590757978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Damasteel-8B-LINEAR/b0a2ef10-8705-4eae-892d-51f3633dcd87.json b/data/hfopenllm_v2/DreadPoor/Damasteel-8B-LINEAR/b0a2ef10-8705-4eae-892d-51f3633dcd87.json new file mode 100644 index 000000000..215188c0e --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Damasteel-8B-LINEAR/b0a2ef10-8705-4eae-892d-51f3633dcd87.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Damasteel-8B-LINEAR/1762652579.569221", + "retrieved_timestamp": "1762652579.569222", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Damasteel-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Damasteel-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7384417789243651 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5388142176959776 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16691842900302115 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42124999999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3779089095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Dearly_Beloved-8B-TIES/3d46ee0f-8ec0-4723-ac8d-fe88db7053c1.json b/data/hfopenllm_v2/DreadPoor/Dearly_Beloved-8B-TIES/3d46ee0f-8ec0-4723-ac8d-fe88db7053c1.json new file mode 100644 index 000000000..6450f21ef --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Dearly_Beloved-8B-TIES/3d46ee0f-8ec0-4723-ac8d-fe88db7053c1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Dearly_Beloved-8B-TIES/1762652579.569437", + "retrieved_timestamp": "1762652579.569438", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Dearly_Beloved-8B-TIES", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Dearly_Beloved-8B-TIES" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8266687943545348 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4049833102731906 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21148036253776434 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41746875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2826628989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Decayed-8B-LINEAR/5658866d-fd86-4203-b14f-84f9a4784028.json b/data/hfopenllm_v2/DreadPoor/Decayed-8B-LINEAR/5658866d-fd86-4203-b14f-84f9a4784028.json new file mode 100644 index 000000000..d6f6a4b47 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Decayed-8B-LINEAR/5658866d-fd86-4203-b14f-84f9a4784028.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Decayed-8B-LINEAR/1762652579.569654", + "retrieved_timestamp": "1762652579.569655", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Decayed-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Decayed-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7676176988169169 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5417014088773181 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1714501510574018 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4186145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37632978723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Derivative-8B-Model_Stock/9ef7e716-8638-46ac-a455-f601c1cfddc1.json b/data/hfopenllm_v2/DreadPoor/Derivative-8B-Model_Stock/9ef7e716-8638-46ac-a455-f601c1cfddc1.json new file mode 100644 index 000000000..cc763e3c0 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Derivative-8B-Model_Stock/9ef7e716-8638-46ac-a455-f601c1cfddc1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Derivative-8B-Model_Stock/1762652579.569859", + "retrieved_timestamp": "1762652579.56986", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Derivative-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Derivative-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7667433520835827 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5395493987763994 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17900302114803626 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42004166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3810671542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Derivative_V2-8B-Model_Stock/3320dceb-b5ef-4267-81d3-b6fe2a415eee.json b/data/hfopenllm_v2/DreadPoor/Derivative_V2-8B-Model_Stock/3320dceb-b5ef-4267-81d3-b6fe2a415eee.json new file mode 100644 index 000000000..966a070c0 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Derivative_V2-8B-Model_Stock/3320dceb-b5ef-4267-81d3-b6fe2a415eee.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Derivative_V2-8B-Model_Stock/1762652579.5701172", + "retrieved_timestamp": "1762652579.570118", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Derivative_V2-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Derivative_V2-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7536791269387447 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5392643954415269 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41229166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38563829787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Derivative_V2_ALT-8B-Model_Stock/ac19b0a8-1955-4bab-b7ae-451a84dc09c6.json b/data/hfopenllm_v2/DreadPoor/Derivative_V2_ALT-8B-Model_Stock/ac19b0a8-1955-4bab-b7ae-451a84dc09c6.json new file mode 100644 index 000000000..8dc10f445 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Derivative_V2_ALT-8B-Model_Stock/ac19b0a8-1955-4bab-b7ae-451a84dc09c6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Derivative_V2_ALT-8B-Model_Stock/1762652579.570343", + "retrieved_timestamp": "1762652579.570344", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Derivative_V2_ALT-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Derivative_V2_ALT-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7719639445560003 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5365351570462934 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18806646525679757 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41346875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38821476063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Derivative_V3-8B-Model_Stock/54f51897-7b47-4e95-9c1a-58ecd64caa96.json b/data/hfopenllm_v2/DreadPoor/Derivative_V3-8B-Model_Stock/54f51897-7b47-4e95-9c1a-58ecd64caa96.json new file mode 100644 index 000000000..af833e351 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Derivative_V3-8B-Model_Stock/54f51897-7b47-4e95-9c1a-58ecd64caa96.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Derivative_V3-8B-Model_Stock/1762652579.570688", + "retrieved_timestamp": "1762652579.570689", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Derivative_V3-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Derivative_V3-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6963767248677952 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.524319745545524 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14652567975830816 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4149895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35023271276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Elusive_Dragon_Heart-8B-LINEAR/fbc53f61-cb3b-4f85-a724-fc07c6912c22.json b/data/hfopenllm_v2/DreadPoor/Elusive_Dragon_Heart-8B-LINEAR/fbc53f61-cb3b-4f85-a724-fc07c6912c22.json new file mode 100644 index 000000000..81bbba4e2 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Elusive_Dragon_Heart-8B-LINEAR/fbc53f61-cb3b-4f85-a724-fc07c6912c22.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Elusive_Dragon_Heart-8B-LINEAR/1762652579.570945", + "retrieved_timestamp": "1762652579.570946", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Elusive_Dragon_Heart-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Elusive_Dragon_Heart-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7131378076836128 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5456414280881592 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14803625377643503 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4145520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3813996010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Emu_Eggs-9B-Model_Stock/9343177e-5432-47c7-9fb6-90f2dc9125e5.json b/data/hfopenllm_v2/DreadPoor/Emu_Eggs-9B-Model_Stock/9343177e-5432-47c7-9fb6-90f2dc9125e5.json new file mode 100644 index 000000000..124f61de3 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Emu_Eggs-9B-Model_Stock/9343177e-5432-47c7-9fb6-90f2dc9125e5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Emu_Eggs-9B-Model_Stock/1762652579.571181", + "retrieved_timestamp": "1762652579.571182", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Emu_Eggs-9B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Emu_Eggs-9B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7606982805622415 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6051657213517168 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20996978851963746 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33305369127516776 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4070833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4227061170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Eunoia_Vespera-8B-LINEAR/5a835cef-3db8-40c9-8ae3-022d0719c89e.json b/data/hfopenllm_v2/DreadPoor/Eunoia_Vespera-8B-LINEAR/5a835cef-3db8-40c9-8ae3-022d0719c89e.json new file mode 100644 index 000000000..6f1ba7362 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Eunoia_Vespera-8B-LINEAR/5a835cef-3db8-40c9-8ae3-022d0719c89e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Eunoia_Vespera-8B-LINEAR/1762652579.571407", + "retrieved_timestamp": "1762652579.571407", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Eunoia_Vespera-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Eunoia_Vespera-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7235291249440374 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5399310621081937 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1540785498489426 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4184895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38389295212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Fu_sion_HA-8B-SLERP/5d6eb91b-518c-41ae-9e52-bb741b005601.json b/data/hfopenllm_v2/DreadPoor/Fu_sion_HA-8B-SLERP/5d6eb91b-518c-41ae-9e52-bb741b005601.json new file mode 100644 index 000000000..d5130f4c9 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Fu_sion_HA-8B-SLERP/5d6eb91b-518c-41ae-9e52-bb741b005601.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Fu_sion_HA-8B-SLERP/1762652579.57162", + "retrieved_timestamp": "1762652579.5716212", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Fu_sion_HA-8B-SLERP", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Fu_sion_HA-8B-SLERP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7609232392274721 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5372804197028272 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17522658610271905 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41601041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38248005319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/HOT_STINKING_GARBAGE/70471d77-adb1-49df-ab72-8f43f379ab23.json b/data/hfopenllm_v2/DreadPoor/HOT_STINKING_GARBAGE/70471d77-adb1-49df-ab72-8f43f379ab23.json new file mode 100644 index 000000000..9e69daa4e --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/HOT_STINKING_GARBAGE/70471d77-adb1-49df-ab72-8f43f379ab23.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_HOT_STINKING_GARBAGE/1762652579.571834", + "retrieved_timestamp": "1762652579.5718348", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/HOT_STINKING_GARBAGE", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/HOT_STINKING_GARBAGE" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5754265349273262 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4884000866161456 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06722054380664652 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42500000000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30169547872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/H_the_eighth-8B-LINEAR/2bbec710-ce13-4fa3-861b-fce8eee26b3b.json b/data/hfopenllm_v2/DreadPoor/H_the_eighth-8B-LINEAR/2bbec710-ce13-4fa3-861b-fce8eee26b3b.json new file mode 100644 index 000000000..5502a033d --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/H_the_eighth-8B-LINEAR/2bbec710-ce13-4fa3-861b-fce8eee26b3b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_H_the_eighth-8B-LINEAR/1762652579.572039", + "retrieved_timestamp": "1762652579.5720398", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/H_the_eighth-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/H_the_eighth-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7469347996648892 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5383752114303682 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17749244712990936 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41728125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3823969414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Happy_New_Year-8B-Model_Stock/170808e4-7506-44c9-8bb7-5dd92037a347.json b/data/hfopenllm_v2/DreadPoor/Happy_New_Year-8B-Model_Stock/170808e4-7506-44c9-8bb7-5dd92037a347.json new file mode 100644 index 000000000..3c8b5302c --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Happy_New_Year-8B-Model_Stock/170808e4-7506-44c9-8bb7-5dd92037a347.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Happy_New_Year-8B-Model_Stock/1762652579.572258", + "retrieved_timestamp": "1762652579.5722592", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Happy_New_Year-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Happy_New_Year-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7615726272955757 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5367913866457493 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1593655589123867 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4185520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3878823138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Heart_Stolen-8B-Model_Stock/86b9c040-4c5e-413d-ac23-1603c499b5de.json b/data/hfopenllm_v2/DreadPoor/Heart_Stolen-8B-Model_Stock/86b9c040-4c5e-413d-ac23-1603c499b5de.json new file mode 100644 index 000000000..37464c94b --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Heart_Stolen-8B-Model_Stock/86b9c040-4c5e-413d-ac23-1603c499b5de.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Heart_Stolen-8B-Model_Stock/1762652579.572714", + "retrieved_timestamp": "1762652579.5727181", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Heart_Stolen-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Heart_Stolen-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7244533393617822 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5395443745186658 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17220543806646527 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41622916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37940492021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Heart_Stolen-ALT-8B-Model_Stock/141d8908-50cb-4457-a0f0-93d55d1c705b.json b/data/hfopenllm_v2/DreadPoor/Heart_Stolen-ALT-8B-Model_Stock/141d8908-50cb-4457-a0f0-93d55d1c705b.json new file mode 100644 index 000000000..176bb423b --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Heart_Stolen-ALT-8B-Model_Stock/141d8908-50cb-4457-a0f0-93d55d1c705b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Heart_Stolen-ALT-8B-Model_Stock/1762652579.573096", + "retrieved_timestamp": "1762652579.573097", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Heart_Stolen-ALT-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Heart_Stolen-ALT-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7183584001560305 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.526338467747489 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40549999999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37724401595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Here_We_Go_Again-8B-SLERP/1c21cfd2-2b01-44d3-8daa-41493a743a75.json b/data/hfopenllm_v2/DreadPoor/Here_We_Go_Again-8B-SLERP/1c21cfd2-2b01-44d3-8daa-41493a743a75.json new file mode 100644 index 000000000..d9c59156f --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Here_We_Go_Again-8B-SLERP/1c21cfd2-2b01-44d3-8daa-41493a743a75.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Here_We_Go_Again-8B-SLERP/1762652579.573366", + "retrieved_timestamp": "1762652579.573367", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Here_We_Go_Again-8B-SLERP", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Here_We_Go_Again-8B-SLERP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7442120240960651 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5460182474181831 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1729607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4186770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3873005319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Howdy-8B-LINEAR/88df4a25-089c-4f21-b403-a1f5dad112b3.json b/data/hfopenllm_v2/DreadPoor/Howdy-8B-LINEAR/88df4a25-089c-4f21-b403-a1f5dad112b3.json new file mode 100644 index 000000000..e2c569e5d --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Howdy-8B-LINEAR/88df4a25-089c-4f21-b403-a1f5dad112b3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Howdy-8B-LINEAR/1762652579.573699", + "retrieved_timestamp": "1762652579.5737002", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Howdy-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Howdy-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7377923908562614 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5383981582614435 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17749244712990936 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41213541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3806515957446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Incidental-8B-Model_Stock/102ed90e-cbe3-4219-b9c6-cec82c78941f.json b/data/hfopenllm_v2/DreadPoor/Incidental-8B-Model_Stock/102ed90e-cbe3-4219-b9c6-cec82c78941f.json new file mode 100644 index 000000000..9e62e6efd --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Incidental-8B-Model_Stock/102ed90e-cbe3-4219-b9c6-cec82c78941f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Incidental-8B-Model_Stock/1762652579.573979", + "retrieved_timestamp": "1762652579.5739799", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Incidental-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Incidental-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.748183708116686 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5452070612873019 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16163141993957703 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42401041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3873005319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Irina-8B-model_stock/60aebc6f-b3ee-4b32-8b89-4359c990fb23.json b/data/hfopenllm_v2/DreadPoor/Irina-8B-model_stock/60aebc6f-b3ee-4b32-8b89-4359c990fb23.json new file mode 100644 index 000000000..cf7e83e00 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Irina-8B-model_stock/60aebc6f-b3ee-4b32-8b89-4359c990fb23.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Irina-8B-model_stock/1762652579.574285", + "retrieved_timestamp": "1762652579.574286", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Irina-8B-model_stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Irina-8B-model_stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6799403360860294 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5236638956084764 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10196374622356495 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40029166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35738031914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Kindling-8B-Model_Stock/8ee9ad54-c6ca-4afc-931b-ffe1fd1d5971.json b/data/hfopenllm_v2/DreadPoor/Kindling-8B-Model_Stock/8ee9ad54-c6ca-4afc-931b-ffe1fd1d5971.json new file mode 100644 index 000000000..ed0a794ae --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Kindling-8B-Model_Stock/8ee9ad54-c6ca-4afc-931b-ffe1fd1d5971.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Kindling-8B-Model_Stock/1762652579.57468", + "retrieved_timestamp": "1762652579.574682", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Kindling-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Kindling-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7308231049171753 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5492054832931256 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17522658610271905 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4068333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3829787234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/L3.1-BaeZel-8B-Della/6c7dfbaf-648e-4c4a-907f-8639ab1c7312.json b/data/hfopenllm_v2/DreadPoor/L3.1-BaeZel-8B-Della/6c7dfbaf-648e-4c4a-907f-8639ab1c7312.json new file mode 100644 index 000000000..fffb2b7d1 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/L3.1-BaeZel-8B-Della/6c7dfbaf-648e-4c4a-907f-8639ab1c7312.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_L3.1-BaeZel-8B-Della/1762652579.575009", + "retrieved_timestamp": "1762652579.57501", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/L3.1-BaeZel-8B-Della", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/L3.1-BaeZel-8B-Della" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5180243974875552 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5448449542185521 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17447129909365558 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4199791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3902094414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Laughing_Stock-8B-Model_Stock/cf1b2ab2-d18b-44c1-b0ed-476dba32c034.json b/data/hfopenllm_v2/DreadPoor/Laughing_Stock-8B-Model_Stock/cf1b2ab2-d18b-44c1-b0ed-476dba32c034.json new file mode 100644 index 000000000..0255c1efb --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Laughing_Stock-8B-Model_Stock/cf1b2ab2-d18b-44c1-b0ed-476dba32c034.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Laughing_Stock-8B-Model_Stock/1762652579.5752351", + "retrieved_timestamp": "1762652579.575236", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Laughing_Stock-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Laughing_Stock-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7189579205397235 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5449429262155 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1578549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4145520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3764128989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Lava_Lamp-8B-SLERP/26d89e91-7f52-4913-a4e0-3275cca1d8d7.json b/data/hfopenllm_v2/DreadPoor/Lava_Lamp-8B-SLERP/26d89e91-7f52-4913-a4e0-3275cca1d8d7.json new file mode 100644 index 000000000..72514129b --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Lava_Lamp-8B-SLERP/26d89e91-7f52-4913-a4e0-3275cca1d8d7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Lava_Lamp-8B-SLERP/1762652579.575455", + "retrieved_timestamp": "1762652579.575455", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Lava_Lamp-8B-SLERP", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Lava_Lamp-8B-SLERP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7381170848903134 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5367586873360172 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17371601208459214 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4187083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/LemonP-8B-Model_Stock/f13fb9a9-f53c-4c7e-9e29-fabb010a617b.json b/data/hfopenllm_v2/DreadPoor/LemonP-8B-Model_Stock/f13fb9a9-f53c-4c7e-9e29-fabb010a617b.json new file mode 100644 index 000000000..4c72d7b2a --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/LemonP-8B-Model_Stock/f13fb9a9-f53c-4c7e-9e29-fabb010a617b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_LemonP-8B-Model_Stock/1762652579.575685", + "retrieved_timestamp": "1762652579.575686", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/LemonP-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/LemonP-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7676176988169169 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5439348074265458 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17673716012084592 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40810416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40043218085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Lydia_of_Whiterun-8B-LINEAR/cee29aba-b6c1-42a2-88d0-a92080b3c083.json b/data/hfopenllm_v2/DreadPoor/Lydia_of_Whiterun-8B-LINEAR/cee29aba-b6c1-42a2-88d0-a92080b3c083.json new file mode 100644 index 000000000..0105a8bb2 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Lydia_of_Whiterun-8B-LINEAR/cee29aba-b6c1-42a2-88d0-a92080b3c083.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Lydia_of_Whiterun-8B-LINEAR/1762652579.575901", + "retrieved_timestamp": "1762652579.575901", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Lydia_of_Whiterun-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Lydia_of_Whiterun-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.760323718843779 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5379527944750039 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17673716012084592 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42506249999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3800698138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Matryoshka-8B-LINEAR/2f8ce822-9278-49e5-878a-69439e794623.json b/data/hfopenllm_v2/DreadPoor/Matryoshka-8B-LINEAR/2f8ce822-9278-49e5-878a-69439e794623.json new file mode 100644 index 000000000..202fb4f1e --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Matryoshka-8B-LINEAR/2f8ce822-9278-49e5-878a-69439e794623.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Matryoshka-8B-LINEAR/1762652579.576119", + "retrieved_timestamp": "1762652579.5761201", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Matryoshka-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Matryoshka-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7262519005128614 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5444280006376178 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17522658610271905 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42524999999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3865525265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Mercury_In_Retrograde-8b-Model-Stock/eff11f37-ec26-4866-8109-0ee6dcac7fec.json b/data/hfopenllm_v2/DreadPoor/Mercury_In_Retrograde-8b-Model-Stock/eff11f37-ec26-4866-8109-0ee6dcac7fec.json new file mode 100644 index 000000000..7605ab331 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Mercury_In_Retrograde-8b-Model-Stock/eff11f37-ec26-4866-8109-0ee6dcac7fec.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Mercury_In_Retrograde-8b-Model-Stock/1762652579.576331", + "retrieved_timestamp": "1762652579.576332", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Mercury_In_Retrograde-8b-Model-Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Mercury_In_Retrograde-8b-Model-Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7296240641497892 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5390507664719518 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4198854166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38289561170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Minthy-8B-Model_Stock/394ac507-8bdb-4d06-bf6e-87911443ec2b.json b/data/hfopenllm_v2/DreadPoor/Minthy-8B-Model_Stock/394ac507-8bdb-4d06-bf6e-87911443ec2b.json new file mode 100644 index 000000000..6eee39abf --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Minthy-8B-Model_Stock/394ac507-8bdb-4d06-bf6e-87911443ec2b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Minthy-8B-Model_Stock/1762652579.5765939", + "retrieved_timestamp": "1762652579.5765948", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Minthy-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Minthy-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.765769269981427 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5352951319641014 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19184290030211482 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40940624999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3992686170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Minthy_ALT-8B-Model_Stock/709e429f-0a98-4ae6-b10f-f0546ef2d9b5.json b/data/hfopenllm_v2/DreadPoor/Minthy_ALT-8B-Model_Stock/709e429f-0a98-4ae6-b10f-f0546ef2d9b5.json new file mode 100644 index 000000000..d16b0f35c --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Minthy_ALT-8B-Model_Stock/709e429f-0a98-4ae6-b10f-f0546ef2d9b5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Minthy_ALT-8B-Model_Stock/1762652579.57681", + "retrieved_timestamp": "1762652579.576811", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Minthy_ALT-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Minthy_ALT-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6991992358054406 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5374800202589046 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17598187311178248 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4225208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3673537234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Minthy_V2-8B-Model_Stock/3f8011c6-6826-4788-b848-ec6938eefa7f.json b/data/hfopenllm_v2/DreadPoor/Minthy_V2-8B-Model_Stock/3f8011c6-6826-4788-b848-ec6938eefa7f.json new file mode 100644 index 000000000..bd82fdf46 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Minthy_V2-8B-Model_Stock/3f8011c6-6826-4788-b848-ec6938eefa7f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Minthy_V2-8B-Model_Stock/1762652579.5770218", + "retrieved_timestamp": "1762652579.577023", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Minthy_V2-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Minthy_V2-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7125881549843305 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5491095928821667 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1593655589123867 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4198854166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37367021276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Minus_Penus-8B-Model_Stock/395b9855-e394-46c9-b95a-75203399aed4.json b/data/hfopenllm_v2/DreadPoor/Minus_Penus-8B-Model_Stock/395b9855-e394-46c9-b95a-75203399aed4.json new file mode 100644 index 000000000..8b32069de --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Minus_Penus-8B-Model_Stock/395b9855-e394-46c9-b95a-75203399aed4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Minus_Penus-8B-Model_Stock/1762652579.577236", + "retrieved_timestamp": "1762652579.577237", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Minus_Penus-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Minus_Penus-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7311477989512272 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5343781571200968 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2001510574018127 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40190624999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3751662234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock/bc85d435-a537-4ed0-bf4e-02d9c30b5fa3.json b/data/hfopenllm_v2/DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock/bc85d435-a537-4ed0-bf4e-02d9c30b5fa3.json new file mode 100644 index 000000000..116766542 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock/bc85d435-a537-4ed0-bf4e-02d9c30b5fa3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Not_Even_My_Final_Form-8B-Model_Stock/1762652579.577775", + "retrieved_timestamp": "1762652579.5777762", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7721889032212308 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5350849793007441 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17598187311178248 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41473958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3839760638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Nother_One-8B-Model_Stock/464f363d-ab94-4cac-8846-fbcf25be3dec.json b/data/hfopenllm_v2/DreadPoor/Nother_One-8B-Model_Stock/464f363d-ab94-4cac-8846-fbcf25be3dec.json new file mode 100644 index 000000000..25711f8bd --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Nother_One-8B-Model_Stock/464f363d-ab94-4cac-8846-fbcf25be3dec.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Nother_One-8B-Model_Stock/1762652579.578036", + "retrieved_timestamp": "1762652579.578037", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Nother_One-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Nother_One-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6863101016414226 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5204527600425481 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15181268882175228 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38702083333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35945811170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Noxis-8B-LINEAR/8778fbef-d0f0-4a47-8adb-8e8f594d9195.json b/data/hfopenllm_v2/DreadPoor/Noxis-8B-LINEAR/8778fbef-d0f0-4a47-8adb-8e8f594d9195.json new file mode 100644 index 000000000..dd11d4926 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Noxis-8B-LINEAR/8778fbef-d0f0-4a47-8adb-8e8f594d9195.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Noxis-8B-LINEAR/1762652579.578263", + "retrieved_timestamp": "1762652579.578263", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Noxis-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Noxis-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6913057354486096 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5420956502068554 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19788519637462235 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4230833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3660239361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Nullsworn-12B-LINEAR/3f92cd91-57b4-46eb-864b-2e4870b920fc.json b/data/hfopenllm_v2/DreadPoor/Nullsworn-12B-LINEAR/3f92cd91-57b4-46eb-864b-2e4870b920fc.json new file mode 100644 index 000000000..714856aa4 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Nullsworn-12B-LINEAR/3f92cd91-57b4-46eb-864b-2e4870b920fc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Nullsworn-12B-LINEAR/1762652579.578492", + "retrieved_timestamp": "1762652579.5784929", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Nullsworn-12B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Nullsworn-12B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44356086295473784 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5483045026677609 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11253776435045318 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43495833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3645279255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Nwah-8B-Model_Stock/34dec14e-846a-4037-8dbd-f1d1599d5adf.json b/data/hfopenllm_v2/DreadPoor/Nwah-8B-Model_Stock/34dec14e-846a-4037-8dbd-f1d1599d5adf.json new file mode 100644 index 000000000..ed1ad924a --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Nwah-8B-Model_Stock/34dec14e-846a-4037-8dbd-f1d1599d5adf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Nwah-8B-Model_Stock/1762652579.578718", + "retrieved_timestamp": "1762652579.578719", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Nwah-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Nwah-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7715893828375378 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5384269019541996 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4039479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3807347074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/ONeil-model_stock-8B/7f5fa4e0-e28c-46df-acbd-22e7b010a407.json b/data/hfopenllm_v2/DreadPoor/ONeil-model_stock-8B/7f5fa4e0-e28c-46df-acbd-22e7b010a407.json new file mode 100644 index 000000000..4a4a7f162 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/ONeil-model_stock-8B/7f5fa4e0-e28c-46df-acbd-22e7b010a407.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_ONeil-model_stock-8B/1762652579.578939", + "retrieved_timestamp": "1762652579.57894", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/ONeil-model_stock-8B", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/ONeil-model_stock-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6785662043378236 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5548337982400763 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10120845921450151 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41734374999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35987367021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Oh_Boy-8B-LINEAR/393ad85d-6b8b-466d-99e0-6a89bf0ce66e.json b/data/hfopenllm_v2/DreadPoor/Oh_Boy-8B-LINEAR/393ad85d-6b8b-466d-99e0-6a89bf0ce66e.json new file mode 100644 index 000000000..68abbce7f --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Oh_Boy-8B-LINEAR/393ad85d-6b8b-466d-99e0-6a89bf0ce66e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Oh_Boy-8B-LINEAR/1762652579.5791628", + "retrieved_timestamp": "1762652579.5791638", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Oh_Boy-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Oh_Boy-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7503069633018169 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5375114406292553 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1782477341389728 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4107708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3848902925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/OrangeJ-8B-Model_Stock/d436f2a4-ebd5-4712-871a-0616f491bda4.json b/data/hfopenllm_v2/DreadPoor/OrangeJ-8B-Model_Stock/d436f2a4-ebd5-4712-871a-0616f491bda4.json new file mode 100644 index 000000000..07b0a699e --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/OrangeJ-8B-Model_Stock/d436f2a4-ebd5-4712-871a-0616f491bda4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_OrangeJ-8B-Model_Stock/1762652579.57939", + "retrieved_timestamp": "1762652579.579391", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/OrangeJ-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/OrangeJ-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7841039552830933 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5413478053905038 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17598187311178248 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4027708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3968583776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR-lorablated/827c075e-78a2-4e4b-a561-b95728cdf2b2.json b/data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR-lorablated/827c075e-78a2-4e4b-a561-b95728cdf2b2.json new file mode 100644 index 000000000..4ce48a078 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR-lorablated/827c075e-78a2-4e4b-a561-b95728cdf2b2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Promissum_Mane-8B-LINEAR-lorablated/1762652579.579823", + "retrieved_timestamp": "1762652579.5798242", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Promissum_Mane-8B-LINEAR-lorablated", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Promissum_Mane-8B-LINEAR-lorablated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7156356245872064 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5435183631990302 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15332326283987915 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4197916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37391954787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR/d44a7888-1463-4492-9359-f8287a8f7f01.json b/data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR/d44a7888-1463-4492-9359-f8287a8f7f01.json new file mode 100644 index 000000000..63d993044 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Promissum_Mane-8B-LINEAR/d44a7888-1463-4492-9359-f8287a8f7f01.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Promissum_Mane-8B-LINEAR/1762652579.5796108", + "retrieved_timestamp": "1762652579.579612", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Promissum_Mane-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Promissum_Mane-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7150361042035134 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5457684398146738 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1555891238670695 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42004166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38505651595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/RPMash-8B-Model_Stock/aa8e7299-0c36-4f27-b8c9-e9a5e4da8c97.json b/data/hfopenllm_v2/DreadPoor/RPMash-8B-Model_Stock/aa8e7299-0c36-4f27-b8c9-e9a5e4da8c97.json new file mode 100644 index 000000000..9654d3bd7 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/RPMash-8B-Model_Stock/aa8e7299-0c36-4f27-b8c9-e9a5e4da8c97.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_RPMash-8B-Model_Stock/1762652579.5800488", + "retrieved_timestamp": "1762652579.58005", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/RPMash-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/RPMash-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4563502617499346 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5169088291675549 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10800604229607251 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.405375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3603723404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/RPMash_V3-8B-Model_Stock/c7e0c75d-f0c1-4a44-b540-607e99c69e92.json b/data/hfopenllm_v2/DreadPoor/RPMash_V3-8B-Model_Stock/c7e0c75d-f0c1-4a44-b540-607e99c69e92.json new file mode 100644 index 000000000..d9c6c8429 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/RPMash_V3-8B-Model_Stock/c7e0c75d-f0c1-4a44-b540-607e99c69e92.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_RPMash_V3-8B-Model_Stock/1762652579.580262", + "retrieved_timestamp": "1762652579.580263", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/RPMash_V3-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/RPMash_V3-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.70491961329273 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5217453397523113 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37775000000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36136968085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Rusted_Gold-8B-LINEAR/70f7842f-1111-4c6a-914d-35e48537d1fc.json b/data/hfopenllm_v2/DreadPoor/Rusted_Gold-8B-LINEAR/70f7842f-1111-4c6a-914d-35e48537d1fc.json new file mode 100644 index 000000000..867a4619e --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Rusted_Gold-8B-LINEAR/70f7842f-1111-4c6a-914d-35e48537d1fc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Rusted_Gold-8B-LINEAR/1762652579.58047", + "retrieved_timestamp": "1762652579.580471", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Rusted_Gold-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Rusted_Gold-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7296240641497892 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5386646439313688 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1933534743202417 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41775 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37799202127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-LINEAR/4b9a1e5a-dc99-44d9-b4f4-6bef1eb285ca.json b/data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-LINEAR/4b9a1e5a-dc99-44d9-b4f4-6bef1eb285ca.json new file mode 100644 index 000000000..0a20e5ce8 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-LINEAR/4b9a1e5a-dc99-44d9-b4f4-6bef1eb285ca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Rusted_Platinum-8B-LINEAR/1762652579.580692", + "retrieved_timestamp": "1762652579.580693", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Rusted_Platinum-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Rusted_Platinum-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7179838384375679 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5427868416987739 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17220543806646527 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39666666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37300531914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-Model_Stock/219e3183-8d9c-4188-a550-72d7f20ff1ec.json b/data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-Model_Stock/219e3183-8d9c-4188-a550-72d7f20ff1ec.json new file mode 100644 index 000000000..119929b35 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Rusted_Platinum-8B-Model_Stock/219e3183-8d9c-4188-a550-72d7f20ff1ec.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Rusted_Platinum-8B-Model_Stock/1762652579.580914", + "retrieved_timestamp": "1762652579.580915", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Rusted_Platinum-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Rusted_Platinum-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44078821970150317 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5242840148078765 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10196374622356495 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37406249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3546376329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Sellen-8B-model_stock/45e281e8-f28c-40a5-92e4-c16b627adb32.json b/data/hfopenllm_v2/DreadPoor/Sellen-8B-model_stock/45e281e8-f28c-40a5-92e4-c16b627adb32.json new file mode 100644 index 000000000..a0b5b0911 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Sellen-8B-model_stock/45e281e8-f28c-40a5-92e4-c16b627adb32.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Sellen-8B-model_stock/1762652579.5811431", + "retrieved_timestamp": "1762652579.581144", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Sellen-8B-model_stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Sellen-8B-model_stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7112893788481229 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5231680557624704 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1336858006042296 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3960416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35696476063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Something-8B-Model_Stock/1d1bf908-44fb-4b87-b52d-845a1cdafc08.json b/data/hfopenllm_v2/DreadPoor/Something-8B-Model_Stock/1d1bf908-44fb-4b87-b52d-845a1cdafc08.json new file mode 100644 index 000000000..e20e1de5f --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Something-8B-Model_Stock/1d1bf908-44fb-4b87-b52d-845a1cdafc08.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Something-8B-Model_Stock/1762652579.5815392", + "retrieved_timestamp": "1762652579.58154", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Something-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Something-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5043107842746135 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5395029370473196 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41873958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3885472074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Spring_Dusk-8B-SCE/e9124a70-037d-41ed-becb-953382a3f43a.json b/data/hfopenllm_v2/DreadPoor/Spring_Dusk-8B-SCE/e9124a70-037d-41ed-becb-953382a3f43a.json new file mode 100644 index 000000000..7bb2dd306 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Spring_Dusk-8B-SCE/e9124a70-037d-41ed-becb-953382a3f43a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Spring_Dusk-8B-SCE/1762652579.581773", + "retrieved_timestamp": "1762652579.581774", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Spring_Dusk-8B-SCE", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Spring_Dusk-8B-SCE" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6514636719459922 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5635271357931001 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07628398791540786 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45997916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3435837765957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Summer_Dawn-8B-SCE/7d7eefa4-193a-4158-a903-9a8484b36e9a.json b/data/hfopenllm_v2/DreadPoor/Summer_Dawn-8B-SCE/7d7eefa4-193a-4158-a903-9a8484b36e9a.json new file mode 100644 index 000000000..6fc6bc4a1 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Summer_Dawn-8B-SCE/7d7eefa4-193a-4158-a903-9a8484b36e9a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Summer_Dawn-8B-SCE/1762652579.581994", + "retrieved_timestamp": "1762652579.581994", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Summer_Dawn-8B-SCE", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Summer_Dawn-8B-SCE" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6642032030567783 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.539111375413361 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17220543806646527 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41204166666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37533244680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Summer_Dusk-8B-TIES/a2cad434-61a0-40be-8740-6c6a8e3cea25.json b/data/hfopenllm_v2/DreadPoor/Summer_Dusk-8B-TIES/a2cad434-61a0-40be-8740-6c6a8e3cea25.json new file mode 100644 index 000000000..3c8f275e5 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Summer_Dusk-8B-TIES/a2cad434-61a0-40be-8740-6c6a8e3cea25.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Summer_Dusk-8B-TIES/1762652579.582258", + "retrieved_timestamp": "1762652579.582258", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Summer_Dusk-8B-TIES", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Summer_Dusk-8B-TIES" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4922206412319312 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5359662578395569 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18051359516616314 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4266770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3855551861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-SCE/9f4730ec-a162-455c-83ef-c8fa9ebd036c.json b/data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-SCE/9f4730ec-a162-455c-83ef-c8fa9ebd036c.json new file mode 100644 index 000000000..7f34c06e7 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-SCE/9f4730ec-a162-455c-83ef-c8fa9ebd036c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Summer_Rain-8B-SCE/1762652579.582465", + "retrieved_timestamp": "1762652579.5824661", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Summer_Rain-8B-SCE", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Summer_Rain-8B-SCE" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5459259210007226 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5845948417986419 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0702416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4477291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3550531914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-TIES/1704c33f-e00e-4fbb-be4c-3d1fe85d635f.json b/data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-TIES/1704c33f-e00e-4fbb-be4c-3d1fe85d635f.json new file mode 100644 index 000000000..f9f31e11a --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Summer_Rain-8B-TIES/1704c33f-e00e-4fbb-be4c-3d1fe85d635f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Summer_Rain-8B-TIES/1762652579.582679", + "retrieved_timestamp": "1762652579.582679", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Summer_Rain-8B-TIES", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Summer_Rain-8B-TIES" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5444021861992845 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5845948417986419 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0702416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4477291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3550531914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Sun-8B-Model_Stock/13b16b8d-533f-4323-a75a-e16df96b8351.json b/data/hfopenllm_v2/DreadPoor/Sun-8B-Model_Stock/13b16b8d-533f-4323-a75a-e16df96b8351.json new file mode 100644 index 000000000..52aaf65a8 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Sun-8B-Model_Stock/13b16b8d-533f-4323-a75a-e16df96b8351.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Sun-8B-Model_Stock/1762652579.58288", + "retrieved_timestamp": "1762652579.58288", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Sun-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Sun-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7758358932077998 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5263511014407583 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20996978851963746 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40975 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38347739361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock/d0461daa-d106-44ce-9d9c-03a6fef37b45.json b/data/hfopenllm_v2/DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock/d0461daa-d106-44ce-9d9c-03a6fef37b45.json new file mode 100644 index 000000000..41608e6a0 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock/d0461daa-d106-44ce-9d9c-03a6fef37b45.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Sweetened_Condensed_Milk-8B-Model_Stock/1762652579.5830941", + "retrieved_timestamp": "1762652579.583095", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7417142071924716 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5406287643522295 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18731117824773413 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4106770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38480718085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/TEST02-Ignore/414bb880-e2b2-43fb-ad9b-f51d7c4b7ad4.json b/data/hfopenllm_v2/DreadPoor/TEST02-Ignore/414bb880-e2b2-43fb-ad9b-f51d7c4b7ad4.json new file mode 100644 index 000000000..7b5aa339c --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/TEST02-Ignore/414bb880-e2b2-43fb-ad9b-f51d7c4b7ad4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_TEST02-Ignore/1762652579.583313", + "retrieved_timestamp": "1762652579.583314", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/TEST02-Ignore", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/TEST02-Ignore" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6118964347930158 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5601644306147606 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08685800604229607 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41985416666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3468251329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/TEST03-ignore/ceba83fe-89b2-4b8a-ba7d-ed1ad9acb070.json b/data/hfopenllm_v2/DreadPoor/TEST03-ignore/ceba83fe-89b2-4b8a-ba7d-ed1ad9acb070.json new file mode 100644 index 000000000..47b5c3573 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/TEST03-ignore/ceba83fe-89b2-4b8a-ba7d-ed1ad9acb070.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_TEST03-ignore/1762652579.583565", + "retrieved_timestamp": "1762652579.5835662", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/TEST03-ignore", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/TEST03-ignore" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6967014189018471 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5383414134372179 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16540785498489427 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4186145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37890625 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/TEST06-ignore/15dbba84-b177-4bcd-8874-0153152f0015.json b/data/hfopenllm_v2/DreadPoor/TEST06-ignore/15dbba84-b177-4bcd-8874-0153152f0015.json new file mode 100644 index 000000000..b603b1f8c --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/TEST06-ignore/15dbba84-b177-4bcd-8874-0153152f0015.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_TEST06-ignore/1762652579.583824", + "retrieved_timestamp": "1762652579.5838249", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/TEST06-ignore", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/TEST06-ignore" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7322969720342026 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5509060880148441 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11782477341389729 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4224895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3615359042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/TEST07-ignore/39b77252-2729-429b-b220-3b19ca0b6a6c.json b/data/hfopenllm_v2/DreadPoor/TEST07-ignore/39b77252-2729-429b-b220-3b19ca0b6a6c.json new file mode 100644 index 000000000..4242a37fa --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/TEST07-ignore/39b77252-2729-429b-b220-3b19ca0b6a6c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_TEST07-ignore/1762652579.5841951", + "retrieved_timestamp": "1762652579.584198", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/TEST07-ignore", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/TEST07-ignore" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7399655137258031 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5561275711510345 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40937500000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3879654255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/TEST08-ignore/79b7bdb6-82a7-466f-8d9a-b26211f4ee73.json b/data/hfopenllm_v2/DreadPoor/TEST08-ignore/79b7bdb6-82a7-466f-8d9a-b26211f4ee73.json new file mode 100644 index 000000000..abeccc73b --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/TEST08-ignore/79b7bdb6-82a7-466f-8d9a-b26211f4ee73.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_TEST08-ignore/1762652579.5845299", + "retrieved_timestamp": "1762652579.5845308", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/TEST08-ignore", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/TEST08-ignore" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7466599733152479 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5453519655444978 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18202416918429004 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40810416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3853058510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Trinas_Nectar-8B-model_stock/922fec6c-cfec-47cf-a374-5676635a5b40.json b/data/hfopenllm_v2/DreadPoor/Trinas_Nectar-8B-model_stock/922fec6c-cfec-47cf-a374-5676635a5b40.json new file mode 100644 index 000000000..d9cf10826 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Trinas_Nectar-8B-model_stock/922fec6c-cfec-47cf-a374-5676635a5b40.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Trinas_Nectar-8B-model_stock/1762652579.58478", + "retrieved_timestamp": "1762652579.5847821", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Trinas_Nectar-8B-model_stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Trinas_Nectar-8B-model_stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7259272064788096 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5256123853406084 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15256797583081572 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4067708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36178523936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock/5945660f-40e1-4c49-8f28-581f06b51e59.json b/data/hfopenllm_v2/DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock/5945660f-40e1-4c49-8f28-581f06b51e59.json new file mode 100644 index 000000000..416b273e8 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock/5945660f-40e1-4c49-8f28-581f06b51e59.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_UNTESTED-VENN_1.2-8B-Model_Stock/1762652579.585024", + "retrieved_timestamp": "1762652579.585025", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47176270074513404 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5475027267486955 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1540785498489426 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4449375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.378656914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/VENN_1.2-8B-Model_Stock/0adfce8d-0070-4375-be96-a34466851101.json b/data/hfopenllm_v2/DreadPoor/VENN_1.2-8B-Model_Stock/0adfce8d-0070-4375-be96-a34466851101.json new file mode 100644 index 000000000..7e9d0819e --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/VENN_1.2-8B-Model_Stock/0adfce8d-0070-4375-be96-a34466851101.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_VENN_1.2-8B-Model_Stock/1762652579.5852559", + "retrieved_timestamp": "1762652579.585257", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/VENN_1.2-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/VENN_1.2-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7226049105262924 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5458812486333333 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17069486404833836 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42001041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3720910904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/WIP-Acacia-8B-Model_Stock/d28bdd9d-53bb-498f-84cb-7d482f41d005.json b/data/hfopenllm_v2/DreadPoor/WIP-Acacia-8B-Model_Stock/d28bdd9d-53bb-498f-84cb-7d482f41d005.json new file mode 100644 index 000000000..dd970d1bc --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/WIP-Acacia-8B-Model_Stock/d28bdd9d-53bb-498f-84cb-7d482f41d005.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_WIP-Acacia-8B-Model_Stock/1762652579.5854762", + "retrieved_timestamp": "1762652579.585477", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/WIP-Acacia-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/WIP-Acacia-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6246359659038019 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5194665568943516 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16691842900302115 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4225833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37367021276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/WIP_Damascus-8B-TIES/38e5b086-4a73-4ffa-9b32-eb80405fecb5.json b/data/hfopenllm_v2/DreadPoor/WIP_Damascus-8B-TIES/38e5b086-4a73-4ffa-9b32-eb80405fecb5.json new file mode 100644 index 000000000..a27308345 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/WIP_Damascus-8B-TIES/38e5b086-4a73-4ffa-9b32-eb80405fecb5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_WIP_Damascus-8B-TIES/1762652579.5856981", + "retrieved_timestamp": "1762652579.5856981", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/WIP_Damascus-8B-TIES", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/WIP_Damascus-8B-TIES" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4776326812856554 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5410672913070808 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16540785498489427 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41185416666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37608045212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Wannabe-8B-Model_Stock/fafc0425-a4f0-4c5b-8328-5dfca7d6402f.json b/data/hfopenllm_v2/DreadPoor/Wannabe-8B-Model_Stock/fafc0425-a4f0-4c5b-8328-5dfca7d6402f.json new file mode 100644 index 000000000..e36878b91 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Wannabe-8B-Model_Stock/fafc0425-a4f0-4c5b-8328-5dfca7d6402f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Wannabe-8B-Model_Stock/1762652579.585919", + "retrieved_timestamp": "1762652579.58592", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Wannabe-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Wannabe-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7204816553411615 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5389637944785705 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17749244712990936 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41346875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.383061835106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/What_A_Thrill-8B-Model_Stock/b9fadd79-8220-4023-b92a-c38b07a90e8f.json b/data/hfopenllm_v2/DreadPoor/What_A_Thrill-8B-Model_Stock/b9fadd79-8220-4023-b92a-c38b07a90e8f.json new file mode 100644 index 000000000..d3fcab786 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/What_A_Thrill-8B-Model_Stock/b9fadd79-8220-4023-b92a-c38b07a90e8f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_What_A_Thrill-8B-Model_Stock/1762652579.5861409", + "retrieved_timestamp": "1762652579.586142", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/What_A_Thrill-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/What_A_Thrill-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7064433480941679 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.531144904394377 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18202416918429004 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40804166666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3615359042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Winter-8B-SCE/b351842a-aa2a-494a-8159-c732f071c7c6.json b/data/hfopenllm_v2/DreadPoor/Winter-8B-SCE/b351842a-aa2a-494a-8159-c732f071c7c6.json new file mode 100644 index 000000000..eb6e9782d --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Winter-8B-SCE/b351842a-aa2a-494a-8159-c732f071c7c6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Winter-8B-SCE/1762652579.586359", + "retrieved_timestamp": "1762652579.58636", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Winter-8B-SCE", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Winter-8B-SCE" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7536292592543341 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5261733490323383 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19184290030211482 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4070833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38389295212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Winter_Dawn-8B-TIES/21947721-9f9a-4cc2-aa88-e1853f488167.json b/data/hfopenllm_v2/DreadPoor/Winter_Dawn-8B-TIES/21947721-9f9a-4cc2-aa88-e1853f488167.json new file mode 100644 index 000000000..a518a2260 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Winter_Dawn-8B-TIES/21947721-9f9a-4cc2-aa88-e1853f488167.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Winter_Dawn-8B-TIES/1762652579.586569", + "retrieved_timestamp": "1762652579.58657", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Winter_Dawn-8B-TIES", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Winter_Dawn-8B-TIES" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5496482665992899 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5309416142154736 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18580060422960726 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42785416666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3910405585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Winter_Dusk-8B-TIES/cdc03c25-5bfb-4185-8e29-40e1af2ef253.json b/data/hfopenllm_v2/DreadPoor/Winter_Dusk-8B-TIES/cdc03c25-5bfb-4185-8e29-40e1af2ef253.json new file mode 100644 index 000000000..8a68e76c3 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Winter_Dusk-8B-TIES/cdc03c25-5bfb-4185-8e29-40e1af2ef253.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Winter_Dusk-8B-TIES/1762652579.586781", + "retrieved_timestamp": "1762652579.586782", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Winter_Dusk-8B-TIES", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Winter_Dusk-8B-TIES" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7152610628687439 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4951882158967103 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07175226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3688229166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3478224734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Winter_Night-8B-Model_Stock/49d98c73-75d8-4629-8cc2-a03592b0f551.json b/data/hfopenllm_v2/DreadPoor/Winter_Night-8B-Model_Stock/49d98c73-75d8-4629-8cc2-a03592b0f551.json new file mode 100644 index 000000000..700ce6efc --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Winter_Night-8B-Model_Stock/49d98c73-75d8-4629-8cc2-a03592b0f551.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Winter_Night-8B-Model_Stock/1762652579.587023", + "retrieved_timestamp": "1762652579.587024", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Winter_Night-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Winter_Night-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7040452665593957 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5184968441488284 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14577039274924472 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3914270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3666057180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Yafune-8B-Model_Stock/edaf2deb-16a3-4109-84e0-e65498e09d1f.json b/data/hfopenllm_v2/DreadPoor/Yafune-8B-Model_Stock/edaf2deb-16a3-4109-84e0-e65498e09d1f.json new file mode 100644 index 000000000..5d6cc5ca4 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Yafune-8B-Model_Stock/edaf2deb-16a3-4109-84e0-e65498e09d1f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Yafune-8B-Model_Stock/1762652579.587391", + "retrieved_timestamp": "1762652579.587392", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Yafune-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Yafune-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7533045652202822 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5466719512941253 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271812080536913 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41728125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38505651595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Yearn_V3-8B-Model_Stock/763eec85-4395-43b6-aa79-9ecb024eb7af.json b/data/hfopenllm_v2/DreadPoor/Yearn_V3-8B-Model_Stock/763eec85-4395-43b6-aa79-9ecb024eb7af.json new file mode 100644 index 000000000..c6c7ea126 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Yearn_V3-8B-Model_Stock/763eec85-4395-43b6-aa79-9ecb024eb7af.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Yearn_V3-8B-Model_Stock/1762652579.587668", + "retrieved_timestamp": "1762652579.587669", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Yearn_V3-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Yearn_V3-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7289746760816855 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5322019394938072 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18957703927492447 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3908958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3801529255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/ZEUS-8B-V17-Abliterated_ALT/538f74e4-2587-43d7-a3fb-7826f3995ad9.json b/data/hfopenllm_v2/DreadPoor/ZEUS-8B-V17-Abliterated_ALT/538f74e4-2587-43d7-a3fb-7826f3995ad9.json new file mode 100644 index 000000000..685131ff1 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/ZEUS-8B-V17-Abliterated_ALT/538f74e4-2587-43d7-a3fb-7826f3995ad9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_ZEUS-8B-V17-Abliterated_ALT/1762652579.587883", + "retrieved_timestamp": "1762652579.587884", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/ZEUS-8B-V17-Abliterated_ALT", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/ZEUS-8B-V17-Abliterated_ALT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5511221337163171 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5231075970343642 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1903323262839879 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41492708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3890458776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Zelus-8B-Model_Stock/2a1d9c9c-b3e4-49d8-96cb-720e53184db6.json b/data/hfopenllm_v2/DreadPoor/Zelus-8B-Model_Stock/2a1d9c9c-b3e4-49d8-96cb-720e53184db6.json new file mode 100644 index 000000000..1f5bfd3df --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Zelus-8B-Model_Stock/2a1d9c9c-b3e4-49d8-96cb-720e53184db6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Zelus-8B-Model_Stock/1762652579.5881522", + "retrieved_timestamp": "1762652579.5881522", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Zelus-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Zelus-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.778833495126265 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5307011398651839 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42140625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38414228723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/Zelus_V2-8B-Model_Stock/b385729e-27f8-4bf2-b2c6-674504fcd75b.json b/data/hfopenllm_v2/DreadPoor/Zelus_V2-8B-Model_Stock/b385729e-27f8-4bf2-b2c6-674504fcd75b.json new file mode 100644 index 000000000..602bc81b0 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/Zelus_V2-8B-Model_Stock/b385729e-27f8-4bf2-b2c6-674504fcd75b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Zelus_V2-8B-Model_Stock/1762652579.588366", + "retrieved_timestamp": "1762652579.5883808", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Zelus_V2-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/Zelus_V2-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7898243327703826 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5344816839912676 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2054380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3960729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38331117021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/hakuchido-8B-MODEL_STOCK/a9d24835-302c-445b-b1fd-89d41e3e7878.json b/data/hfopenllm_v2/DreadPoor/hakuchido-8B-MODEL_STOCK/a9d24835-302c-445b-b1fd-89d41e3e7878.json new file mode 100644 index 000000000..32b08e124 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/hakuchido-8B-MODEL_STOCK/a9d24835-302c-445b-b1fd-89d41e3e7878.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_hakuchido-8B-MODEL_STOCK/1762652579.589018", + "retrieved_timestamp": "1762652579.589018", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/hakuchido-8B-MODEL_STOCK", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/hakuchido-8B-MODEL_STOCK" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7375175645066203 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5398373390214104 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19486404833836857 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41746875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3781582446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/ichor-8B-Model_Stock/b1b0d419-e025-488a-a367-6769edfdf8ff.json b/data/hfopenllm_v2/DreadPoor/ichor-8B-Model_Stock/b1b0d419-e025-488a-a367-6769edfdf8ff.json new file mode 100644 index 000000000..ccef33494 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/ichor-8B-Model_Stock/b1b0d419-e025-488a-a367-6769edfdf8ff.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_ichor-8B-Model_Stock/1762652579.589237", + "retrieved_timestamp": "1762652579.589238", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/ichor-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/ichor-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5386319410275846 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5084222037759372 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10876132930513595 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42121875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31507646276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/ichor_1.1-8B-Model_Stock/64afccfe-af45-4c26-878a-eb01b56f3524.json b/data/hfopenllm_v2/DreadPoor/ichor_1.1-8B-Model_Stock/64afccfe-af45-4c26-878a-eb01b56f3524.json new file mode 100644 index 000000000..630eef705 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/ichor_1.1-8B-Model_Stock/64afccfe-af45-4c26-878a-eb01b56f3524.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_ichor_1.1-8B-Model_Stock/1762652579.589439", + "retrieved_timestamp": "1762652579.589439", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/ichor_1.1-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/ichor_1.1-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8096328851890761 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.528067770617839 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17749244712990936 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4067708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3855551861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/inexpertus-8B-Model_Stock/1f0112d0-46b4-4a2c-9ccc-4872ccbae7a5.json b/data/hfopenllm_v2/DreadPoor/inexpertus-8B-Model_Stock/1f0112d0-46b4-4a2c-9ccc-4872ccbae7a5.json new file mode 100644 index 000000000..54ea922ef --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/inexpertus-8B-Model_Stock/1f0112d0-46b4-4a2c-9ccc-4872ccbae7a5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_inexpertus-8B-Model_Stock/1762652579.589726", + "retrieved_timestamp": "1762652579.589729", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/inexpertus-8B-Model_Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/inexpertus-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7795327508787795 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5280190470468065 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17069486404833836 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41182291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3790724734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/inexpertus_1.1-8B-LINEAR/86f45b60-19d1-41fa-8538-3d22ea28a98f.json b/data/hfopenllm_v2/DreadPoor/inexpertus_1.1-8B-LINEAR/86f45b60-19d1-41fa-8538-3d22ea28a98f.json new file mode 100644 index 000000000..e22bcbbe1 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/inexpertus_1.1-8B-LINEAR/86f45b60-19d1-41fa-8538-3d22ea28a98f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_inexpertus_1.1-8B-LINEAR/1762652579.59006", + "retrieved_timestamp": "1762652579.590061", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/inexpertus_1.1-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/inexpertus_1.1-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7527050448365891 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5524638802167572 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1729607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41734374999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38272938829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/inexpertus_1.2-8B-LINEAR/c2465654-27c4-4cad-94fa-3b0bff1fd242.json b/data/hfopenllm_v2/DreadPoor/inexpertus_1.2-8B-LINEAR/c2465654-27c4-4cad-94fa-3b0bff1fd242.json new file mode 100644 index 000000000..17134c2eb --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/inexpertus_1.2-8B-LINEAR/c2465654-27c4-4cad-94fa-3b0bff1fd242.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_inexpertus_1.2-8B-LINEAR/1762652579.590318", + "retrieved_timestamp": "1762652579.5903192", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/inexpertus_1.2-8B-LINEAR", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/inexpertus_1.2-8B-LINEAR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7347947889377962 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5523440600721518 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15861027190332325 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41334374999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37882313829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/mergekit-nuslerp-nqzkedi/c1bff8a8-6159-4fe6-a9bd-846846d0e633.json b/data/hfopenllm_v2/DreadPoor/mergekit-nuslerp-nqzkedi/c1bff8a8-6159-4fe6-a9bd-846846d0e633.json new file mode 100644 index 000000000..23b82570e --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/mergekit-nuslerp-nqzkedi/c1bff8a8-6159-4fe6-a9bd-846846d0e633.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_mergekit-nuslerp-nqzkedi/1762652579.590566", + "retrieved_timestamp": "1762652579.590566", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/mergekit-nuslerp-nqzkedi", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/mergekit-nuslerp-nqzkedi" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7764852812759035 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5361918366546249 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18806646525679757 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4224583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3918716755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/remember_to_breathe-8b-Model-Stock/76309e63-a135-45cf-9f06-b091215726d0.json b/data/hfopenllm_v2/DreadPoor/remember_to_breathe-8b-Model-Stock/76309e63-a135-45cf-9f06-b091215726d0.json new file mode 100644 index 000000000..ad9645730 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/remember_to_breathe-8b-Model-Stock/76309e63-a135-45cf-9f06-b091215726d0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_remember_to_breathe-8b-Model-Stock/1762652579.5907981", + "retrieved_timestamp": "1762652579.590799", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/remember_to_breathe-8b-Model-Stock", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/remember_to_breathe-8b-Model-Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7104150321147887 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5411654435599922 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1487915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4144583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37608045212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/test/a4f14e1c-4c16-4fb8-9753-f05a6c5f2836.json b/data/hfopenllm_v2/DreadPoor/test/a4f14e1c-4c16-4fb8-9753-f05a6c5f2836.json new file mode 100644 index 000000000..ae885b201 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/test/a4f14e1c-4c16-4fb8-9753-f05a6c5f2836.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_test/1762652579.5910451", + "retrieved_timestamp": "1762652579.5910459", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/test", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49369450834895856 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5371873804638203 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1933534743202417 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4350833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3646941489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/test_ALT/1ca8f31a-4df9-4eb5-8ded-506d80246cdd.json b/data/hfopenllm_v2/DreadPoor/test_ALT/1ca8f31a-4df9-4eb5-8ded-506d80246cdd.json new file mode 100644 index 000000000..d57995380 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/test_ALT/1ca8f31a-4df9-4eb5-8ded-506d80246cdd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_test_ALT/1762652579.591327", + "retrieved_timestamp": "1762652579.591328", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/test_ALT", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/test_ALT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.499689712185889 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5370433315307738 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17069486404833836 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4362916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3492353723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/DreadPoor/tests_pending-do_not_use_yet/de113d87-7875-4f5c-89eb-48a59797b19b.json b/data/hfopenllm_v2/DreadPoor/tests_pending-do_not_use_yet/de113d87-7875-4f5c-89eb-48a59797b19b.json new file mode 100644 index 000000000..ca0495026 --- /dev/null +++ b/data/hfopenllm_v2/DreadPoor/tests_pending-do_not_use_yet/de113d87-7875-4f5c-89eb-48a59797b19b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_tests_pending-do_not_use_yet/1762652579.591608", + "retrieved_timestamp": "1762652579.591609", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/tests_pending-do_not_use_yet", + "developer": "DreadPoor", + "inference_platform": "unknown", + "id": "DreadPoor/tests_pending-do_not_use_yet" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7691414336183549 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5407897873885027 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19788519637462235 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40047916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38272938829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2/cbdf2130-1b6a-43ae-a503-4fc7acf14a76.json b/data/hfopenllm_v2/ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2/cbdf2130-1b6a-43ae-a503-4fc7acf14a76.json new file mode 100644 index 000000000..356f857f5 --- /dev/null +++ b/data/hfopenllm_v2/ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2/cbdf2130-1b6a-43ae-a503-4fc7acf14a76.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ECE-ILAB-PRYMMAL_ILAB-Merging-3B-V2/1762652579.5918348", + "retrieved_timestamp": "1762652579.591836", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2", + "developer": "ECE-ILAB-PRYMMAL", + "inference_platform": "unknown", + "id": "ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40289432040319684 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5401935891431586 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15181268882175228 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43321875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38605385638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/1e2cd0e7-ce74-4eac-86fb-64412d1d2094.json b/data/hfopenllm_v2/Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/1e2cd0e7-ce74-4eac-86fb-64412d1d2094.json new file mode 100644 index 000000000..335566455 --- /dev/null +++ b/data/hfopenllm_v2/Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/1e2cd0e7-ce74-4eac-86fb-64412d1d2094.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Edgerunners_meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/1762652579.592541", + "retrieved_timestamp": "1762652579.592542", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16", + "developer": "Edgerunners", + "inference_platform": "unknown", + "id": "Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7147114101694614 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4979908369885237 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09063444108761329 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33415625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36361369680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EleutherAI/pythia-1.4b/e268be37-589d-41f2-af98-a85bb412eb44.json b/data/hfopenllm_v2/EleutherAI/pythia-1.4b/e268be37-589d-41f2-af98-a85bb412eb44.json new file mode 100644 index 000000000..88531211e --- /dev/null +++ b/data/hfopenllm_v2/EleutherAI/pythia-1.4b/e268be37-589d-41f2-af98-a85bb412eb44.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-1.4b/1762652579.593903", + "retrieved_timestamp": "1762652579.593904", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EleutherAI/pythia-1.4b", + "developer": "EleutherAI", + "inference_platform": "unknown", + "id": "EleutherAI/pythia-1.4b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23708094522533543 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.315042649740714 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35378125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11228390957446809 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 1.515 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EleutherAI/pythia-12b/4df16bb2-996f-473f-9096-a8a8e152ca9b.json b/data/hfopenllm_v2/EleutherAI/pythia-12b/4df16bb2-996f-473f-9096-a8a8e152ca9b.json new file mode 100644 index 000000000..8f888f322 --- /dev/null +++ b/data/hfopenllm_v2/EleutherAI/pythia-12b/4df16bb2-996f-473f-9096-a8a8e152ca9b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-12b/1762652579.5942001", + "retrieved_timestamp": "1762652579.594201", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EleutherAI/pythia-12b", + "developer": "EleutherAI", + "inference_platform": "unknown", + "id": "EleutherAI/pythia-12b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24714756845170813 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179653957935337 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24664429530201343 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3646979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11087101063829788 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 12.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EleutherAI/pythia-160m/d59ad4b0-e58e-48d6-90eb-93398c46251a.json b/data/hfopenllm_v2/EleutherAI/pythia-160m/d59ad4b0-e58e-48d6-90eb-93398c46251a.json new file mode 100644 index 000000000..41f5ef5a3 --- /dev/null +++ b/data/hfopenllm_v2/EleutherAI/pythia-160m/d59ad4b0-e58e-48d6-90eb-93398c46251a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-160m/1762652579.5944068", + "retrieved_timestamp": "1762652579.594408", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EleutherAI/pythia-160m", + "developer": "EleutherAI", + "inference_platform": "unknown", + "id": "EleutherAI/pythia-160m" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18155161637787737 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2970437484241321 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4179375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11195146276595745 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 0.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EleutherAI/pythia-1b/a21cc55c-e9df-46ef-beed-b67a1750ddb7.json b/data/hfopenllm_v2/EleutherAI/pythia-1b/a21cc55c-e9df-46ef-beed-b67a1750ddb7.json new file mode 100644 index 000000000..be335ed36 --- /dev/null +++ b/data/hfopenllm_v2/EleutherAI/pythia-1b/a21cc55c-e9df-46ef-beed-b67a1750ddb7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-1b/1762652579.594618", + "retrieved_timestamp": "1762652579.594618", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EleutherAI/pythia-1b", + "developer": "EleutherAI", + "inference_platform": "unknown", + "id": "EleutherAI/pythia-1b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2207941594968018 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3004093017564394 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35520833333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11361369680851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 1.079 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EleutherAI/pythia-2.8b/0afcbde6-b822-4264-8733-bc255ea73314.json b/data/hfopenllm_v2/EleutherAI/pythia-2.8b/0afcbde6-b822-4264-8733-bc255ea73314.json new file mode 100644 index 000000000..4d8b20a0c --- /dev/null +++ b/data/hfopenllm_v2/EleutherAI/pythia-2.8b/0afcbde6-b822-4264-8733-bc255ea73314.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-2.8b/1762652579.594833", + "retrieved_timestamp": "1762652579.5948339", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EleutherAI/pythia-2.8b", + "developer": "EleutherAI", + "inference_platform": "unknown", + "id": "EleutherAI/pythia-2.8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21732226049105263 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3224085936276087 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3485729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11369680851063829 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 2.909 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EleutherAI/pythia-410m/c9db5f06-9aac-4678-bfe0-65773ece4558.json b/data/hfopenllm_v2/EleutherAI/pythia-410m/c9db5f06-9aac-4678-bfe0-65773ece4558.json new file mode 100644 index 000000000..6699e12e9 --- /dev/null +++ b/data/hfopenllm_v2/EleutherAI/pythia-410m/c9db5f06-9aac-4678-bfe0-65773ece4558.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-410m/1762652579.5950441", + "retrieved_timestamp": "1762652579.595045", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EleutherAI/pythia-410m", + "developer": "EleutherAI", + "inference_platform": "unknown", + "id": "EleutherAI/pythia-410m" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21954525104500505 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.302813387064426 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.009818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35781250000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11278257978723404 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 0.506 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EleutherAI/pythia-6.9b/6ae207e3-2596-4b28-b058-d47d07465192.json b/data/hfopenllm_v2/EleutherAI/pythia-6.9b/6ae207e3-2596-4b28-b058-d47d07465192.json new file mode 100644 index 000000000..8acb3bcdc --- /dev/null +++ b/data/hfopenllm_v2/EleutherAI/pythia-6.9b/6ae207e3-2596-4b28-b058-d47d07465192.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-6.9b/1762652579.595358", + "retrieved_timestamp": "1762652579.595359", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EleutherAI/pythia-6.9b", + "developer": "EleutherAI", + "inference_platform": "unknown", + "id": "EleutherAI/pythia-6.9b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22811362739752744 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3232287869322383 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3590520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1146941489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 6.9 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EnnoAi/EnnoAi-7B-French-Instruct-202502/75939d35-c0ca-4256-b667-fe6042ca5979.json b/data/hfopenllm_v2/EnnoAi/EnnoAi-7B-French-Instruct-202502/75939d35-c0ca-4256-b667-fe6042ca5979.json new file mode 100644 index 000000000..fedade8dc --- /dev/null +++ b/data/hfopenllm_v2/EnnoAi/EnnoAi-7B-French-Instruct-202502/75939d35-c0ca-4256-b667-fe6042ca5979.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EnnoAi_EnnoAi-7B-French-Instruct-202502/1762652579.596549", + "retrieved_timestamp": "1762652579.59655", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EnnoAi/EnnoAi-7B-French-Instruct-202502", + "developer": "EnnoAi", + "inference_platform": "unknown", + "id": "EnnoAi/EnnoAi-7B-French-Instruct-202502" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5564424615575562 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5574545199388612 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3723564954682779 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45997916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4013464095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Epiculous/Azure_Dusk-v0.2/79790560-846a-48fb-b37a-462162eb0e97.json b/data/hfopenllm_v2/Epiculous/Azure_Dusk-v0.2/79790560-846a-48fb-b37a-462162eb0e97.json new file mode 100644 index 000000000..bc455d788 --- /dev/null +++ b/data/hfopenllm_v2/Epiculous/Azure_Dusk-v0.2/79790560-846a-48fb-b37a-462162eb0e97.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Epiculous_Azure_Dusk-v0.2/1762652579.5970619", + "retrieved_timestamp": "1762652579.5970628", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Epiculous/Azure_Dusk-v0.2", + "developer": "Epiculous", + "inference_platform": "unknown", + "id": "Epiculous/Azure_Dusk-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.346715603487635 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4119721873553597 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3834583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3034408244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Epiculous/Crimson_Dawn-v0.2/91b7917e-a908-4281-9a4d-a2c1e7558105.json b/data/hfopenllm_v2/Epiculous/Crimson_Dawn-v0.2/91b7917e-a908-4281-9a4d-a2c1e7558105.json new file mode 100644 index 000000000..b65e80934 --- /dev/null +++ b/data/hfopenllm_v2/Epiculous/Crimson_Dawn-v0.2/91b7917e-a908-4281-9a4d-a2c1e7558105.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Epiculous_Crimson_Dawn-v0.2/1762652579.5973198", + "retrieved_timestamp": "1762652579.5973198", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Epiculous/Crimson_Dawn-v0.2", + "developer": "Epiculous", + "inference_platform": "unknown", + "id": "Epiculous/Crimson_Dawn-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3103454389907667 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44823796489645434 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4151770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27210771276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Epiculous/NovaSpark/9270e697-84b1-46c5-afcc-481065f2be8f.json b/data/hfopenllm_v2/Epiculous/NovaSpark/9270e697-84b1-46c5-afcc-481065f2be8f.json new file mode 100644 index 000000000..d68250ff8 --- /dev/null +++ b/data/hfopenllm_v2/Epiculous/NovaSpark/9270e697-84b1-46c5-afcc-481065f2be8f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Epiculous_NovaSpark/1762652579.597535", + "retrieved_timestamp": "1762652579.597536", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Epiculous/NovaSpark", + "developer": "Epiculous", + "inference_platform": "unknown", + "id": "Epiculous/NovaSpark" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6408473960203371 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5063958663768304 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15181268882175228 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3881979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3648603723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Epiculous/Violet_Twilight-v0.2/83990950-a34c-463f-9a1a-d9371910da6f.json b/data/hfopenllm_v2/Epiculous/Violet_Twilight-v0.2/83990950-a34c-463f-9a1a-d9371910da6f.json new file mode 100644 index 000000000..f9ef64fdb --- /dev/null +++ b/data/hfopenllm_v2/Epiculous/Violet_Twilight-v0.2/83990950-a34c-463f-9a1a-d9371910da6f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Epiculous_Violet_Twilight-v0.2/1762652579.597749", + "retrieved_timestamp": "1762652579.59775", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Epiculous/Violet_Twilight-v0.2", + "developer": "Epiculous", + "inference_platform": "unknown", + "id": "Epiculous/Violet_Twilight-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45317756885064964 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4614552476845888 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02870090634441088 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42993750000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3110871010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/DeepPhi-3.5-mini-instruct/b367fb18-f302-41ec-a5f9-7d47766ca6f3.json b/data/hfopenllm_v2/EpistemeAI/DeepPhi-3.5-mini-instruct/b367fb18-f302-41ec-a5f9-7d47766ca6f3.json new file mode 100644 index 000000000..d54a01d07 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/DeepPhi-3.5-mini-instruct/b367fb18-f302-41ec-a5f9-7d47766ca6f3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_DeepPhi-3.5-mini-instruct/1762652579.5991712", + "retrieved_timestamp": "1762652579.599172", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/DeepPhi-3.5-mini-instruct", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/DeepPhi-3.5-mini-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1325915238234551 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28822860667627487 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2332214765100671 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36562500000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11028922872340426 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/FineLlama3.1-8B-Instruct/a99828d9-a521-4b46-bd81-e791fae7bcf8.json b/data/hfopenllm_v2/EpistemeAI/FineLlama3.1-8B-Instruct/a99828d9-a521-4b46-bd81-e791fae7bcf8.json new file mode 100644 index 000000000..feac98594 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/FineLlama3.1-8B-Instruct/a99828d9-a521-4b46-bd81-e791fae7bcf8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_FineLlama3.1-8B-Instruct/1762652579.5997", + "retrieved_timestamp": "1762652579.599701", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/FineLlama3.1-8B-Instruct", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/FineLlama3.1-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08000992921005155 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45573635384163325 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3481666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3112533244680851 + } + } + ], + "additional_details": { + "precision": "4bit", + "architecture": "?", + "params_billions": 14.483 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-12B/bdb69cfa-cce7-4813-babb-b6f987be90de.json b/data/hfopenllm_v2/EpistemeAI/Fireball-12B/bdb69cfa-cce7-4813-babb-b6f987be90de.json new file mode 100644 index 000000000..a3bec4c83 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-12B/bdb69cfa-cce7-4813-babb-b6f987be90de.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-12B/1762652579.59992", + "retrieved_timestamp": "1762652579.59992", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Fireball-12B", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Fireball-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1833501775289565 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5110893652548262 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42363541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3343583776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/627a984d-8a4b-4a10-ac9e-05ccdbcc1835.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/627a984d-8a4b-4a10-ac9e-05ccdbcc1835.json new file mode 100644 index 000000000..d7c6ece5e --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/627a984d-8a4b-4a10-ac9e-05ccdbcc1835.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/1762652579.600397", + "retrieved_timestamp": "1762652579.600397", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4577243934981405 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4838398624677178 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12311178247734139 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39445833333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35829454787234044 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/b8b22223-7ef6-4fec-9928-68de2ce516e6.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/b8b22223-7ef6-4fec-9928-68de2ce516e6.json new file mode 100644 index 000000000..d680108c8 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/b8b22223-7ef6-4fec-9928-68de2ce516e6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/1762652579.601048", + "retrieved_timestamp": "1762652579.6010492", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44318630123627534 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4823644760491404 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13293051359516617 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4066458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3515625 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/7268e623-7dc3-4a79-b410-3f2efdbb6b1b.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/7268e623-7dc3-4a79-b410-3f2efdbb6b1b.json new file mode 100644 index 000000000..35487c5e8 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/7268e623-7dc3-4a79-b410-3f2efdbb6b1b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/1762652579.6022642", + "retrieved_timestamp": "1762652579.6022651", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7207066140063919 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4610092915501656 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13141993957703926 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3432395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3353557180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/ba8d6727-fe89-4bab-95a2-5f70d77034dc.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/ba8d6727-fe89-4bab-95a2-5f70d77034dc.json new file mode 100644 index 000000000..efd0ead2d --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/ba8d6727-fe89-4bab-95a2-5f70d77034dc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/1762652579.601946", + "retrieved_timestamp": "1762652579.6019468", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7304984108831234 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46492466713692354 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13972809667673716 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32088541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34798869680851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/1ad587be-8544-4c37-bb8c-e21ad685039c.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/1ad587be-8544-4c37-bb8c-e21ad685039c.json new file mode 100644 index 000000000..89ea0a504 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/1ad587be-8544-4c37-bb8c-e21ad685039c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/1762652579.60172", + "retrieved_timestamp": "1762652579.601721", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.669099101495144 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4668070143164938 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1336858006042296 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34178125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33892952127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/5f40e687-560e-4846-bbc1-4c2300680d4b.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/5f40e687-560e-4846-bbc1-4c2300680d4b.json new file mode 100644 index 000000000..9f28c1467 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/5f40e687-560e-4846-bbc1-4c2300680d4b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/1762652579.601493", + "retrieved_timestamp": "1762652579.601493", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5975334335119704 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4904191122627008 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1336858006042296 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40103125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34225398936170215 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/839b6ee8-2f25-4b53-abec-a0a9dd198f04.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/839b6ee8-2f25-4b53-abec-a0a9dd198f04.json new file mode 100644 index 000000000..6d128b326 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/839b6ee8-2f25-4b53-abec-a0a9dd198f04.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/1762652579.6012669", + "retrieved_timestamp": "1762652579.601268", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4457339858242796 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48973199216860547 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37622916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3543051861702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/6f29d957-8b65-4ee7-96dd-da2477023403.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/6f29d957-8b65-4ee7-96dd-da2477023403.json new file mode 100644 index 000000000..5213420b8 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/6f29d957-8b65-4ee7-96dd-da2477023403.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/1762652579.6025012", + "retrieved_timestamp": "1762652579.6025019", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4578241288669619 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4760520079608936 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13821752265861026 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3881354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3470744680851064 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/c39007d8-b4b8-485a-88af-39d18a6007c3.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/c39007d8-b4b8-485a-88af-39d18a6007c3.json new file mode 100644 index 000000000..a9a5f3042 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/c39007d8-b4b8-485a-88af-39d18a6007c3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/1762652579.602742", + "retrieved_timestamp": "1762652579.6027431", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7204816553411615 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4817795525811035 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2483221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35480385638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math/506bb9ca-e322-4ee3-b2d6-96e334a99473.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math/506bb9ca-e322-4ee3-b2d6-96e334a99473.json new file mode 100644 index 000000000..bc1dc68fe --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math/506bb9ca-e322-4ee3-b2d6-96e334a99473.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Math/1762652579.602981", + "retrieved_timestamp": "1762652579.6029818", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46229559790245434 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49829504320793055 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10800604229607251 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3640729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33311170212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/e351aba3-7a05-400b-abbf-d09c1fe333e3.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/e351aba3-7a05-400b-abbf-d09c1fe333e3.json new file mode 100644 index 000000000..67c67047e --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/e351aba3-7a05-400b-abbf-d09c1fe333e3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/1762652579.60321", + "retrieved_timestamp": "1762652579.603211", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46109655713506825 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48010141537970213 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12537764350453173 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3998229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35206117021276595 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2/6a0cc28d-d7bc-454d-ab7c-93c823256f30.json b/data/hfopenllm_v2/EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2/6a0cc28d-d7bc-454d-ab7c-93c823256f30.json new file mode 100644 index 000000000..d26513a5e --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2/6a0cc28d-d7bc-454d-ab7c-93c823256f30.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Mistral-Nemo-Base-2407-v1-DPO2/1762652579.603439", + "retrieved_timestamp": "1762652579.60344", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18607295309778055 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49677687590350894 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03625377643504532 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4040104166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33527260638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math/ee2ab45a-4a93-4942-8510-aef93b39b7e3.json b/data/hfopenllm_v2/EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math/ee2ab45a-4a93-4942-8510-aef93b39b7e3.json new file mode 100644 index 000000000..4e763c824 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math/ee2ab45a-4a93-4942-8510-aef93b39b7e3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Mistral-Nemo-Instruct-12B-Philosophy-Math/1762652579.6045282", + "retrieved_timestamp": "1762652579.604529", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06946790072563022 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5364928342081372 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09592145015105741 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42921875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32962101063829785 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/644cdea0-49f2-43b9-b94d-55d31c0e0d54.json b/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/644cdea0-49f2-43b9-b94d-55d31c0e0d54.json new file mode 100644 index 000000000..00987b14c --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/644cdea0-49f2-43b9-b94d-55d31c0e0d54.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/1762652579.6049678", + "retrieved_timestamp": "1762652579.6049678", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7100903380807368 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46279874531423665 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13972809667673716 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3194895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33111702127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/e2422bfe-8569-4181-8ec1-955086bbb8bb.json b/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/e2422bfe-8569-4181-8ec1-955086bbb8bb.json new file mode 100644 index 000000000..018d57906 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/e2422bfe-8569-4181-8ec1-955086bbb8bb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/1762652579.605414", + "retrieved_timestamp": "1762652579.6054149", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.712213593265868 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45659361690861294 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12462235649546828 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32348958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33502327127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/98c2fc89-acc4-4740-9d24-c9e9c2cd9ad7.json b/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/98c2fc89-acc4-4740-9d24-c9e9c2cd9ad7.json new file mode 100644 index 000000000..9e0d4955a --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/98c2fc89-acc4-4740-9d24-c9e9c2cd9ad7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/1762652579.605665", + "retrieved_timestamp": "1762652579.6056662", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6915306941138402 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4524732961901791 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12915407854984895 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35775 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32903922872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2/3e1fd9a0-a037-4278-baaa-b444d3723557.json b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2/3e1fd9a0-a037-4278-baaa-b444d3723557.json new file mode 100644 index 000000000..ed8de8fc9 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2/3e1fd9a0-a037-4278-baaa-b444d3723557.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.2/1762652579.606377", + "retrieved_timestamp": "1762652579.606377", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40871443325930756 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3324495305251265 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11785239361702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3/9c141030-9c3f-4e80-8b97-9297f3d81df6.json b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3/9c141030-9c3f-4e80-8b97-9297f3d81df6.json new file mode 100644 index 000000000..5b768a6f9 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3/9c141030-9c3f-4e80-8b97-9297f3d81df6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.3/1762652579.606596", + "retrieved_timestamp": "1762652579.6065972", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3272816127874041 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3262818751942827 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.326 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11727061170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/d09af70f-bb55-40e8-88f2-a78f20c90b8e.json b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/d09af70f-bb55-40e8-88f2-a78f20c90b8e.json new file mode 100644 index 000000000..a22033ab2 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/d09af70f-bb55-40e8-88f2-a78f20c90b8e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/1762652579.6070201", + "retrieved_timestamp": "1762652579.607021", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7289746760816855 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45181862491313 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15332326283987915 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3486666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3100066489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1/099d3be6-bd40-416f-90a1-582f66049c54.json b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1/099d3be6-bd40-416f-90a1-582f66049c54.json new file mode 100644 index 000000000..19c3ea30d --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1/099d3be6-bd40-416f-90a1-582f66049c54.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1/1762652579.606812", + "retrieved_timestamp": "1762652579.606813", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5119538380386264 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43810846923178864 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10800604229607251 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34352083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2789228723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/03d616a2-9a52-4014-8ecf-94dc93a5b4d2.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/03d616a2-9a52-4014-8ecf-94dc93a5b4d2.json new file mode 100644 index 000000000..088a1f9c8 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/03d616a2-9a52-4014-8ecf-94dc93a5b4d2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/1762652579.60724", + "retrieved_timestamp": "1762652579.607241", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5902893212232432 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.436379591348482 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14803625377643503 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3314270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28233045212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-0/9835468b-c049-4562-8633-864d29c7bb75.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-0/9835468b-c049-4562-8633-864d29c7bb75.json new file mode 100644 index 000000000..7647636dd --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-0/9835468b-c049-4562-8633-864d29c7bb75.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-0/1762652579.60745", + "retrieved_timestamp": "1762652579.60745", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/ReasoningCore-3B-0", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/ReasoningCore-3B-0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7341454008696924 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44460707451155984 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15861027190332325 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35539583333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3172373670212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect/b3efb02e-5312-48cf-b9e9-e90d3d5d9a7d.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect/b3efb02e-5312-48cf-b9e9-e90d3d5d9a7d.json new file mode 100644 index 000000000..cf9aa82c7 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect/b3efb02e-5312-48cf-b9e9-e90d3d5d9a7d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-Instruct-r01-Reflect/1762652579.607657", + "retrieved_timestamp": "1762652579.607658", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7334960128015887 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44496323889512146 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1540785498489426 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3527291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31441156914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-R01/5b06f64a-5c31-457e-a414-00e35888a6b2.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-R01/5b06f64a-5c31-457e-a414-00e35888a6b2.json new file mode 100644 index 000000000..7831b3f28 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-R01/5b06f64a-5c31-457e-a414-00e35888a6b2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-R01/1762652579.607871", + "retrieved_timestamp": "1762652579.607872", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/ReasoningCore-3B-R01", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/ReasoningCore-3B-R01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29760590787998065 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43725189001258497 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31945833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25914228723404253 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2/83b3c488-c210-4ce7-8f7f-75d0d04d5b02.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2/83b3c488-c210-4ce7-8f7f-75d0d04d5b02.json new file mode 100644 index 000000000..7c98fbfb0 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2/83b3c488-c210-4ce7-8f7f-75d0d04d5b02.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-RE1-V2/1762652579.6080902", + "retrieved_timestamp": "1762652579.6080909", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/ReasoningCore-3B-RE1-V2", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/ReasoningCore-3B-RE1-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7393161256576994 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44623884450165807 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3540625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31806848404255317 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2A/512a09c1-6c1c-4120-a659-91809607393a.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2A/512a09c1-6c1c-4120-a659-91809607393a.json new file mode 100644 index 000000000..2345c24c0 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2A/512a09c1-6c1c-4120-a659-91809607393a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-RE1-V2A/1762652579.608308", + "retrieved_timestamp": "1762652579.608309", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/ReasoningCore-3B-RE1-V2A", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/ReasoningCore-3B-RE1-V2A" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5732534120577845 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4189899823502799 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09290030211480363 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33520833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2736037234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2B/f92ef151-aa21-4240-8de6-1ff04bec55d9.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2B/f92ef151-aa21-4240-8de6-1ff04bec55d9.json new file mode 100644 index 000000000..2996bac5b --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2B/f92ef151-aa21-4240-8de6-1ff04bec55d9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-RE1-V2B/1762652579.60862", + "retrieved_timestamp": "1762652579.6086211", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/ReasoningCore-3B-RE1-V2B", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/ReasoningCore-3B-RE1-V2B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5051097753959495 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41678877951897175 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10725075528700906 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3448229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26728723404255317 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2C/88cb3df4-7cbb-440a-87d4-9b2a89f3572c.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2C/88cb3df4-7cbb-440a-87d4-9b2a89f3572c.json new file mode 100644 index 000000000..7ed9cce0d --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-RE1-V2C/88cb3df4-7cbb-440a-87d4-9b2a89f3572c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-RE1-V2C/1762652579.608856", + "retrieved_timestamp": "1762652579.6088572", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/ReasoningCore-3B-RE1-V2C", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/ReasoningCore-3B-RE1-V2C" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5057092957796425 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41774567831526244 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09743202416918428 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34215625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2691156914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1-V1/ec3846e6-d111-4c77-93fb-8d1d8106271a.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1-V1/ec3846e6-d111-4c77-93fb-8d1d8106271a.json new file mode 100644 index 000000000..9bdff3b22 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1-V1/ec3846e6-d111-4c77-93fb-8d1d8106271a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-T1-V1/1762652579.609117", + "retrieved_timestamp": "1762652579.609117", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/ReasoningCore-3B-T1-V1", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/ReasoningCore-3B-T1-V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7207564816908026 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4516908992961786 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14577039274924472 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35403125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31200132978723405 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1_1/ce5a0509-e68c-40f4-8b7b-c56ba90c0e10.json b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1_1/ce5a0509-e68c-40f4-8b7b-c56ba90c0e10.json new file mode 100644 index 000000000..c302de06d --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI/ReasoningCore-3B-T1_1/ce5a0509-e68c-40f4-8b7b-c56ba90c0e10.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-T1_1/1762652579.609335", + "retrieved_timestamp": "1762652579.6093361", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/ReasoningCore-3B-T1_1", + "developer": "EpistemeAI", + "inference_platform": "unknown", + "id": "EpistemeAI/ReasoningCore-3B-T1_1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7274509412802475 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45239424517060806 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1540785498489426 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3553645833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3116688829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-12B-v1.2/de86ca37-ffcb-41df-a0d1-68cb545ec1de.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-12B-v1.2/de86ca37-ffcb-41df-a0d1-68cb545ec1de.json new file mode 100644 index 000000000..ea71ead47 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI2/Fireball-12B-v1.2/de86ca37-ffcb-41df-a0d1-68cb545ec1de.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-12B-v1.2/1762652579.609813", + "retrieved_timestamp": "1762652579.609814", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI2/Fireball-12B-v1.2", + "developer": "EpistemeAI2", + "inference_platform": "unknown", + "id": "EpistemeAI2/Fireball-12B-v1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13553925805750963 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5018583230653281 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04154078549848943 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4173125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33369348404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/7e03e547-5324-4c5d-b364-413014fad7eb.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/7e03e547-5324-4c5d-b364-413014fad7eb.json new file mode 100644 index 000000000..b4930a65b --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/7e03e547-5324-4c5d-b364-413014fad7eb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/1762652579.610973", + "retrieved_timestamp": "1762652579.6109738", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo", + "developer": "EpistemeAI2", + "inference_platform": "unknown", + "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4865756193566404 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48807730539009225 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13066465256797583 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3931875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3614527925531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/0115907a-a473-4f12-8f0b-5dafd729fc44.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/0115907a-a473-4f12-8f0b-5dafd729fc44.json new file mode 100644 index 000000000..c149180bc --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/0115907a-a473-4f12-8f0b-5dafd729fc44.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/1762652579.61236", + "retrieved_timestamp": "1762652579.612361", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math", + "developer": "EpistemeAI2", + "inference_platform": "unknown", + "id": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5515465631191904 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48075580310342053 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1351963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36925 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3420046542553192 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/63b6d34d-1a59-40b6-b663-1d81544867f2.json b/data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/63b6d34d-1a59-40b6-b663-1d81544867f2.json new file mode 100644 index 000000000..0cc466f16 --- /dev/null +++ b/data/hfopenllm_v2/EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/63b6d34d-1a59-40b6-b663-1d81544867f2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/1762652579.6125782", + "retrieved_timestamp": "1762652579.612579", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT", + "developer": "EpistemeAI2", + "inference_platform": "unknown", + "id": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4633195476890207 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4790834283312441 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11706948640483383 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37743750000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3564660904255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Eric111/CatunaMayo-DPO/ef63850d-6acf-4d04-ac01-7ac407bf3b89.json b/data/hfopenllm_v2/Eric111/CatunaMayo-DPO/ef63850d-6acf-4d04-ac01-7ac407bf3b89.json new file mode 100644 index 000000000..1d0067649 --- /dev/null +++ b/data/hfopenllm_v2/Eric111/CatunaMayo-DPO/ef63850d-6acf-4d04-ac01-7ac407bf3b89.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Eric111_CatunaMayo-DPO/1762652579.613287", + "retrieved_timestamp": "1762652579.613288", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Eric111/CatunaMayo-DPO", + "developer": "Eric111", + "inference_platform": "unknown", + "id": "Eric111/CatunaMayo-DPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4214539643700936 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5223991323844243 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08157099697885196 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44503125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3169880319148936 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Eric111/CatunaMayo/9c2ab331-44f5-4306-a57c-5ddb0154ba63.json b/data/hfopenllm_v2/Eric111/CatunaMayo/9c2ab331-44f5-4306-a57c-5ddb0154ba63.json new file mode 100644 index 000000000..bc34dab4f --- /dev/null +++ b/data/hfopenllm_v2/Eric111/CatunaMayo/9c2ab331-44f5-4306-a57c-5ddb0154ba63.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Eric111_CatunaMayo/1762652579.613048", + "retrieved_timestamp": "1762652579.613049", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Eric111/CatunaMayo", + "developer": "Eric111", + "inference_platform": "unknown", + "id": "Eric111/CatunaMayo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4074156571231 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5243635518600797 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45398958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3178191489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/80ff60c0-820c-425d-8b32-44fc61128c9f.json b/data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/80ff60c0-820c-425d-8b32-44fc61128c9f.json new file mode 100644 index 000000000..472ac8459 --- /dev/null +++ b/data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/80ff60c0-820c-425d-8b32-44fc61128c9f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/1762652579.613742", + "retrieved_timestamp": "1762652579.613743", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2", + "developer": "Etherll", + "inference_platform": "unknown", + "id": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37399322686028624 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5410649663618229 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16314199395770393 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4649375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39777260638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties/d3b94b8e-8612-4928-bdba-81226af143b2.json b/data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties/d3b94b8e-8612-4928-bdba-81226af143b2.json new file mode 100644 index 000000000..8064de8af --- /dev/null +++ b/data/hfopenllm_v2/Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties/d3b94b8e-8612-4928-bdba-81226af143b2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties/1762652579.613493", + "retrieved_timestamp": "1762652579.613494", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties", + "developer": "Etherll", + "inference_platform": "unknown", + "id": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3724694920588483 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5410649663618229 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16314199395770393 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4649375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39777260638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Etherll/Qwen2.5-Coder-7B-Instruct-Ties/ea9f32e5-431d-4573-9ac9-25ebfa9c2c9e.json b/data/hfopenllm_v2/Etherll/Qwen2.5-Coder-7B-Instruct-Ties/ea9f32e5-431d-4573-9ac9-25ebfa9c2c9e.json new file mode 100644 index 000000000..a87095004 --- /dev/null +++ b/data/hfopenllm_v2/Etherll/Qwen2.5-Coder-7B-Instruct-Ties/ea9f32e5-431d-4573-9ac9-25ebfa9c2c9e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Etherll_Qwen2.5-Coder-7B-Instruct-Ties/1762652579.61485", + "retrieved_timestamp": "1762652579.614851", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Etherll/Qwen2.5-Coder-7B-Instruct-Ties", + "developer": "Etherll", + "inference_platform": "unknown", + "id": "Etherll/Qwen2.5-Coder-7B-Instruct-Ties" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5005385709916355 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4895144464043051 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29154078549848944 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3296979865771812 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43728125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3503158244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Etherll/SuperHermes/a641d61c-aa42-4bce-afc0-ba7639f0a24e.json b/data/hfopenllm_v2/Etherll/SuperHermes/a641d61c-aa42-4bce-afc0-ba7639f0a24e.json new file mode 100644 index 000000000..7e1dc1709 --- /dev/null +++ b/data/hfopenllm_v2/Etherll/SuperHermes/a641d61c-aa42-4bce-afc0-ba7639f0a24e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Etherll_SuperHermes/1762652579.615286", + "retrieved_timestamp": "1762652579.615287", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Etherll/SuperHermes", + "developer": "Etherll", + "inference_platform": "unknown", + "id": "Etherll/SuperHermes" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5459015412438996 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5289531792679852 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16540785498489427 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44004166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39486369680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FINGU-AI/Chocolatine-Fusion-14B/5d5a7561-8a41-48ea-ae1c-e986ac666f19.json b/data/hfopenllm_v2/FINGU-AI/Chocolatine-Fusion-14B/5d5a7561-8a41-48ea-ae1c-e986ac666f19.json new file mode 100644 index 000000000..439d59adc --- /dev/null +++ b/data/hfopenllm_v2/FINGU-AI/Chocolatine-Fusion-14B/5d5a7561-8a41-48ea-ae1c-e986ac666f19.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FINGU-AI_Chocolatine-Fusion-14B/1762652579.615752", + "retrieved_timestamp": "1762652579.615752", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FINGU-AI/Chocolatine-Fusion-14B", + "developer": "FINGU-AI", + "inference_platform": "unknown", + "id": "FINGU-AI/Chocolatine-Fusion-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6949028577507679 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.64132285324613 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3716442953020134 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49402083333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5261801861702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 8.367 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FINGU-AI/L3-8B/f2a0c2ff-40a4-4a75-93ca-b611c4314dd5.json b/data/hfopenllm_v2/FINGU-AI/L3-8B/f2a0c2ff-40a4-4a75-93ca-b611c4314dd5.json new file mode 100644 index 000000000..8f2b82f31 --- /dev/null +++ b/data/hfopenllm_v2/FINGU-AI/L3-8B/f2a0c2ff-40a4-4a75-93ca-b611c4314dd5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FINGU-AI_L3-8B/1762652579.615993", + "retrieved_timestamp": "1762652579.615993", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FINGU-AI/L3-8B", + "developer": "FINGU-AI", + "inference_platform": "unknown", + "id": "FINGU-AI/L3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7517309627344335 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4985585187130108 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2545317220543807 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38283333333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36394614361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FINGU-AI/Q-Small-3B/11d9d5ea-29f2-412e-af48-858626ebeec5.json b/data/hfopenllm_v2/FINGU-AI/Q-Small-3B/11d9d5ea-29f2-412e-af48-858626ebeec5.json new file mode 100644 index 000000000..a78f891cc --- /dev/null +++ b/data/hfopenllm_v2/FINGU-AI/Q-Small-3B/11d9d5ea-29f2-412e-af48-858626ebeec5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FINGU-AI_Q-Small-3B/1762652579.616768", + "retrieved_timestamp": "1762652579.61677", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FINGU-AI/Q-Small-3B", + "developer": "FINGU-AI", + "inference_platform": "unknown", + "id": "FINGU-AI/Q-Small-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4145345461154182 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43185314557630744 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40054166666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27900598404255317 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FINGU-AI/QwQ-Buddy-32B-Alpha/32836e5d-d413-4e40-8c9c-4cb8c3daa23a.json b/data/hfopenllm_v2/FINGU-AI/QwQ-Buddy-32B-Alpha/32836e5d-d413-4e40-8c9c-4cb8c3daa23a.json new file mode 100644 index 000000000..fe73df295 --- /dev/null +++ b/data/hfopenllm_v2/FINGU-AI/QwQ-Buddy-32B-Alpha/32836e5d-d413-4e40-8c9c-4cb8c3daa23a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FINGU-AI_QwQ-Buddy-32B-Alpha/1762652579.617035", + "retrieved_timestamp": "1762652579.617036", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FINGU-AI/QwQ-Buddy-32B-Alpha", + "developer": "FINGU-AI", + "inference_platform": "unknown", + "id": "FINGU-AI/QwQ-Buddy-32B-Alpha" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34464221598691475 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.642442234274039 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37919463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5059895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5294215425531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 19.662 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FINGU-AI/RomboUltima-32B/65c5a05d-0b24-4767-88ff-24984fa0f988.json b/data/hfopenllm_v2/FINGU-AI/RomboUltima-32B/65c5a05d-0b24-4767-88ff-24984fa0f988.json new file mode 100644 index 000000000..04017fd31 --- /dev/null +++ b/data/hfopenllm_v2/FINGU-AI/RomboUltima-32B/65c5a05d-0b24-4767-88ff-24984fa0f988.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FINGU-AI_RomboUltima-32B/1762652579.6173398", + "retrieved_timestamp": "1762652579.617341", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FINGU-AI/RomboUltima-32B", + "developer": "FINGU-AI", + "inference_platform": "unknown", + "id": "FINGU-AI/RomboUltima-32B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6671509372908327 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6938448333620042 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5385196374622356 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3716442953020134 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4836354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.578873005319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 17.645 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FINGU-AI/Ultimos-32B/fa69d78a-e112-45ff-80c3-b4eb30d83ed9.json b/data/hfopenllm_v2/FINGU-AI/Ultimos-32B/fa69d78a-e112-45ff-80c3-b4eb30d83ed9.json new file mode 100644 index 000000000..c94d143a5 --- /dev/null +++ b/data/hfopenllm_v2/FINGU-AI/Ultimos-32B/fa69d78a-e112-45ff-80c3-b4eb30d83ed9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FINGU-AI_Ultimos-32B/1762652579.617578", + "retrieved_timestamp": "1762652579.617579", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FINGU-AI/Ultimos-32B", + "developer": "FINGU-AI", + "inference_platform": "unknown", + "id": "FINGU-AI/Ultimos-32B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1592197591280026 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2905531373728777 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24916107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32860416666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11112034574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 9.604 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FallenMerick/Chewy-Lemon-Cookie-11B/f4f2289c-5b3c-4040-9e34-ac20352f45d7.json b/data/hfopenllm_v2/FallenMerick/Chewy-Lemon-Cookie-11B/f4f2289c-5b3c-4040-9e34-ac20352f45d7.json new file mode 100644 index 000000000..979995a7c --- /dev/null +++ b/data/hfopenllm_v2/FallenMerick/Chewy-Lemon-Cookie-11B/f4f2289c-5b3c-4040-9e34-ac20352f45d7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FallenMerick_Chewy-Lemon-Cookie-11B/1762652579.6178062", + "retrieved_timestamp": "1762652579.6178071", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FallenMerick/Chewy-Lemon-Cookie-11B", + "developer": "FallenMerick", + "inference_platform": "unknown", + "id": "FallenMerick/Chewy-Lemon-Cookie-11B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4875242135312083 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5251122307375103 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45455208333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3267121010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Felladrin/Llama-160M-Chat-v1/0885ef86-d7ef-4261-8ccd-f0391c42ffe4.json b/data/hfopenllm_v2/Felladrin/Llama-160M-Chat-v1/0885ef86-d7ef-4261-8ccd-f0391c42ffe4.json new file mode 100644 index 000000000..ba4f2d907 --- /dev/null +++ b/data/hfopenllm_v2/Felladrin/Llama-160M-Chat-v1/0885ef86-d7ef-4261-8ccd-f0391c42ffe4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Felladrin_Llama-160M-Chat-v1/1762652579.618279", + "retrieved_timestamp": "1762652579.61828", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Felladrin/Llama-160M-Chat-v1", + "developer": "Felladrin", + "inference_platform": "unknown", + "id": "Felladrin/Llama-160M-Chat-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15754642127333254 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30360811146348365 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.366125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11361369680851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.162 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Felladrin/Minueza-32M-UltraChat/44324409-5cb3-438a-9751-9ee868b35233.json b/data/hfopenllm_v2/Felladrin/Minueza-32M-UltraChat/44324409-5cb3-438a-9751-9ee868b35233.json new file mode 100644 index 000000000..1c5a1d6c3 --- /dev/null +++ b/data/hfopenllm_v2/Felladrin/Minueza-32M-UltraChat/44324409-5cb3-438a-9751-9ee868b35233.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Felladrin_Minueza-32M-UltraChat/1762652579.6187", + "retrieved_timestamp": "1762652579.6187022", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Felladrin/Minueza-32M-UltraChat", + "developer": "Felladrin", + "inference_platform": "unknown", + "id": "Felladrin/Minueza-32M-UltraChat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13756277787381924 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2941478734048925 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.004531722054380665 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37418749999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11328125 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 0.033 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d37d499c-74cc-4fbb-9a3c-80776ebf2b82.json b/data/hfopenllm_v2/FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d37d499c-74cc-4fbb-9a3c-80776ebf2b82.json new file mode 100644 index 000000000..256a65501 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d37d499c-74cc-4fbb-9a3c-80776ebf2b82.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/1762652579.618947", + "retrieved_timestamp": "1762652579.618948", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30832191917445706 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3323387445789459 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33021875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14976728723404256 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.5 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/fc62bbce-e2e4-4b41-b632-a09eb8b0a4d6.json b/data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/fc62bbce-e2e4-4b41-b632-a09eb8b0a4d6.json new file mode 100644 index 000000000..dd47a9c7c --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/fc62bbce-e2e4-4b41-b632-a09eb8b0a4d6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/1762652579.619225", + "retrieved_timestamp": "1762652579.6192262", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.509730847484674 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5214989784123593 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09743202416918428 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43095833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37691156914893614 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 16.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/157d1e12-ced4-4b48-a651-5671a2b85ee6.json b/data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/157d1e12-ced4-4b48-a651-5671a2b85ee6.json new file mode 100644 index 000000000..406cf79d5 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/157d1e12-ced4-4b48-a651-5671a2b85ee6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/1762652579.619448", + "retrieved_timestamp": "1762652579.6194491", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28154408081667753 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3305518729746925 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030966767371601207 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33021875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15408909574468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.5 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/5450695c-a1fd-431f-9201-19d858e48867.json b/data/hfopenllm_v2/FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/5450695c-a1fd-431f-9201-19d858e48867.json new file mode 100644 index 000000000..05e04bde5 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/5450695c-a1fd-431f-9201-19d858e48867.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/1762652579.619661", + "retrieved_timestamp": "1762652579.619661", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3015775919006015 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33246082656550385 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03323262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3408229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14852061170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.5 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d780dd37-3e71-400a-93be-f9512ad77d3e.json b/data/hfopenllm_v2/FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d780dd37-3e71-400a-93be-f9512ad77d3e.json new file mode 100644 index 000000000..4824ff032 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d780dd37-3e71-400a-93be-f9512ad77d3e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/1762652579.619875", + "retrieved_timestamp": "1762652579.6198761", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28693976426991497 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33465340701604496 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030211480362537766 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3289479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15550199468085107 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.5 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb/4ba295dd-43f3-45d6-8abe-58cd6fb11eee.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb/4ba295dd-43f3-45d6-8abe-58cd6fb11eee.json new file mode 100644 index 000000000..09772d0cb --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb/4ba295dd-43f3-45d6-8abe-58cd6fb11eee.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1000k_fineweb/1762652579.620099", + "retrieved_timestamp": "1762652579.6201", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2-135M_pretrained_1000k_fineweb", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14845388014911545 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2917939408206228 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35806249999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1163563829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/7d967a13-3d40-4a9c-ac1d-956c2b2b6b98.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/7d967a13-3d40-4a9c-ac1d-956c2b2b6b98.json new file mode 100644 index 000000000..9820bbe98 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/7d967a13-3d40-4a9c-ac1d-956c2b2b6b98.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/1762652579.620331", + "retrieved_timestamp": "1762652579.620332", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15537329840379083 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3066426145674803 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25083892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35803125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11427859042553191 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/93f69ae3-c779-4f6b-8ac9-9bd8478e7eb2.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/93f69ae3-c779-4f6b-8ac9-9bd8478e7eb2.json new file mode 100644 index 000000000..801378377 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/93f69ae3-c779-4f6b-8ac9-9bd8478e7eb2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/1762652579.62055", + "retrieved_timestamp": "1762652579.6205509", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14678054229444543 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29317781029884354 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4047604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11569148936170212 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb/3b102085-a3f6-4da6-abdf-f906f0b37f3c.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb/3b102085-a3f6-4da6-abdf-f906f0b37f3c.json new file mode 100644 index 000000000..77be3598c --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb/3b102085-a3f6-4da6-abdf-f906f0b37f3c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1200k_fineweb/1762652579.620773", + "retrieved_timestamp": "1762652579.620773", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2-135M_pretrained_1200k_fineweb", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15809607397261488 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29409841468035297 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3713645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10762965425531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/c8e1bfa5-d1dc-4bcb-9b91-397302006b1d.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/c8e1bfa5-d1dc-4bcb-9b91-397302006b1d.json new file mode 100644 index 000000000..8f23adca0 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/c8e1bfa5-d1dc-4bcb-9b91-397302006b1d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/1762652579.6209762", + "retrieved_timestamp": "1762652579.620977", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.157771379938563 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29496212100634955 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0007552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36999999999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11394614361702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/d4dabe47-4bc9-46fe-8c2d-206d5ed8874a.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/d4dabe47-4bc9-46fe-8c2d-206d5ed8874a.json new file mode 100644 index 000000000..2afcb49f4 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/d4dabe47-4bc9-46fe-8c2d-206d5ed8874a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/1762652579.6211882", + "retrieved_timestamp": "1762652579.6211882", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15847063569107744 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29604672415652145 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0075528700906344415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3567291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11643949468085106 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb/c5cb1709-7ba4-438c-8af7-d96cb4ab4ad0.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb/c5cb1709-7ba4-438c-8af7-d96cb4ab4ad0.json new file mode 100644 index 000000000..47ea25b72 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb/c5cb1709-7ba4-438c-8af7-d96cb4ab4ad0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1400k_fineweb/1762652579.6213892", + "retrieved_timestamp": "1762652579.62139", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2-135M_pretrained_1400k_fineweb", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17638089158987041 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2921781950918249 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.011329305135951661 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3873333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1079621010638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/75cbe3a2-cbfa-482b-8c35-b74caf046df8.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/75cbe3a2-cbfa-482b-8c35-b74caf046df8.json new file mode 100644 index 000000000..842e04193 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/75cbe3a2-cbfa-482b-8c35-b74caf046df8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/1762652579.621598", + "retrieved_timestamp": "1762652579.621599", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17066051410258115 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2992388897714206 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3939375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11045545212765957 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/062fa044-0fd4-49ea-988d-f477c7930496.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/062fa044-0fd4-49ea-988d-f477c7930496.json new file mode 100644 index 000000000..8cd9a2adf --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/062fa044-0fd4-49ea-988d-f477c7930496.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/1762652579.621813", + "retrieved_timestamp": "1762652579.621814", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15384956360235286 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.291672957517483 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37406249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11369680851063829 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/af001f63-a060-49ec-9bd3-f06b2ad96dc8.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/af001f63-a060-49ec-9bd3-f06b2ad96dc8.json new file mode 100644 index 000000000..4f70179c1 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/af001f63-a060-49ec-9bd3-f06b2ad96dc8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/1762652579.622025", + "retrieved_timestamp": "1762652579.622026", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14747979804695985 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30287372123209483 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0037764350453172208 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35784375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11195146276595745 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected/556e1124-135e-473f-9e62-852f095b3118.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected/556e1124-135e-473f-9e62-852f095b3118.json new file mode 100644 index 000000000..b6e838c03 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected/556e1124-135e-473f-9e62-852f095b3118.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_selected/1762652579.622248", + "retrieved_timestamp": "1762652579.622248", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13451530827094332 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2927186496606003 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0075528700906344415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25083892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36603125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11311502659574468 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb/982d6727-aa6c-41fe-abe7-47811ad3c9da.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb/982d6727-aa6c-41fe-abe7-47811ad3c9da.json new file mode 100644 index 000000000..2cf1def1e --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb/982d6727-aa6c-41fe-abe7-47811ad3c9da.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_400k_fineweb/1762652579.62247", + "retrieved_timestamp": "1762652579.62247", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2-135M_pretrained_400k_fineweb", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1511267880335288 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29723404576965046 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3794270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11627327127659574 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/7b8f532b-c3a5-48fe-9d3f-e9c8b6f6897d.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/7b8f532b-c3a5-48fe-9d3f-e9c8b6f6897d.json new file mode 100644 index 000000000..d17ad513f --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/7b8f532b-c3a5-48fe-9d3f-e9c8b6f6897d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/1762652579.622689", + "retrieved_timestamp": "1762652579.62269", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.155648124753432 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3048804422828362 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2550335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38599999999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11377992021276596 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected/1ce9e40f-5613-4d95-b451-a34f3feb961e.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected/1ce9e40f-5613-4d95-b451-a34f3feb961e.json new file mode 100644 index 000000000..db63b0194 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected/1ce9e40f-5613-4d95-b451-a34f3feb961e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_selected/1762652579.62291", + "retrieved_timestamp": "1762652579.622911", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15842076800666677 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2925171720555518 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38199999999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1157746010638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb/bf6d3042-aa42-45b5-8bb1-49a8c5e2fd50.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb/bf6d3042-aa42-45b5-8bb1-49a8c5e2fd50.json new file mode 100644 index 000000000..3e1fc5d93 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb/bf6d3042-aa42-45b5-8bb1-49a8c5e2fd50.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_600k_fineweb/1762652579.623165", + "retrieved_timestamp": "1762652579.6231658", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2-135M_pretrained_600k_fineweb", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16391618682872555 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3013718229200533 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38085416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11261635638297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/4446e0a4-abdc-48a4-83f7-cc3d4aeede78.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/4446e0a4-abdc-48a4-83f7-cc3d4aeede78.json new file mode 100644 index 000000000..e24b2382d --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/4446e0a4-abdc-48a4-83f7-cc3d4aeede78.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/1762652579.623383", + "retrieved_timestamp": "1762652579.623384", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16414114549395603 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30001678726257036 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3793333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1146941489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected/52f63809-1390-4a66-8ae2-8f150425d2d9.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected/52f63809-1390-4a66-8ae2-8f150425d2d9.json new file mode 100644 index 000000000..21689ed65 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected/52f63809-1390-4a66-8ae2-8f150425d2d9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_selected/1762652579.623598", + "retrieved_timestamp": "1762652579.623599", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16059389087620846 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2983444769655102 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0075528700906344415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3846354166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11619015957446809 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb/6b7b5025-01c0-470b-8856-b628b11f4e6c.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb/6b7b5025-01c0-470b-8856-b628b11f4e6c.json new file mode 100644 index 000000000..3495f58cc --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb/6b7b5025-01c0-470b-8856-b628b11f4e6c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_800k_fineweb/1762652579.623817", + "retrieved_timestamp": "1762652579.623818", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2-135M_pretrained_800k_fineweb", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16414114549395603 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29594449748780255 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.008308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24916107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.370125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11519281914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/b85e5d55-dbdd-4383-ac86-75c83648c522.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/b85e5d55-dbdd-4383-ac86-75c83648c522.json new file mode 100644 index 000000000..4d2efbda2 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/b85e5d55-dbdd-4383-ac86-75c83648c522.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/1762652579.62404", + "retrieved_timestamp": "1762652579.6240408", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1622927166584662 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3038096660271284 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3992708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11377992021276596 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected/dcddcf2f-f3fe-4f45-8c42-e95b1ac99d88.json b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected/dcddcf2f-f3fe-4f45-8c42-e95b1ac99d88.json new file mode 100644 index 000000000..052ad6c7a --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected/dcddcf2f-f3fe-4f45-8c42-e95b1ac99d88.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_selected/1762652579.624255", + "retrieved_timestamp": "1762652579.624256", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14742993036254914 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2942808065535252 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.004531722054380665 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3766354166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11303191489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/smollm2_pretrained_200k_fineweb/3d10ce78-6474-48c0-8eb3-c5b7146d3e06.json b/data/hfopenllm_v2/FlofloB/smollm2_pretrained_200k_fineweb/3d10ce78-6474-48c0-8eb3-c5b7146d3e06.json new file mode 100644 index 000000000..f7bd1b3ff --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/smollm2_pretrained_200k_fineweb/3d10ce78-6474-48c0-8eb3-c5b7146d3e06.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_smollm2_pretrained_200k_fineweb/1762652579.624471", + "retrieved_timestamp": "1762652579.624471", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/smollm2_pretrained_200k_fineweb", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/smollm2_pretrained_200k_fineweb" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15270039051937748 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.299468427221449 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0037764350453172208 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24748322147651006 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3699375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11594082446808511 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/41e2bd81-2369-416a-9287-021872efd931.json b/data/hfopenllm_v2/FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/41e2bd81-2369-416a-9287-021872efd931.json new file mode 100644 index 000000000..c07e5af67 --- /dev/null +++ b/data/hfopenllm_v2/FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/41e2bd81-2369-416a-9287-021872efd931.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FlofloB_test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/1762652579.6246889", + "retrieved_timestamp": "1762652579.6246898", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", + "developer": "FlofloB", + "inference_platform": "unknown", + "id": "FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.521546164177715 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5240829189778252 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11027190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42441666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3720910904255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 16.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs/bfaec047-518f-42a0-93a1-c6bda3589c26.json b/data/hfopenllm_v2/FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs/bfaec047-518f-42a0-93a1-c6bda3589c26.json new file mode 100644 index 000000000..c4c2972c9 --- /dev/null +++ b/data/hfopenllm_v2/FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs/bfaec047-518f-42a0-93a1-c6bda3589c26.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FuJhen_ft-openhermes-25-mistral-7b-irca-dpo-pairs/1762652579.624908", + "retrieved_timestamp": "1762652579.6249092", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs", + "developer": "FuJhen", + "inference_platform": "unknown", + "id": "FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5420041046645123 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47730323895548116 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04833836858006042 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.417375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2956283244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 14.483 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FuJhen/mistral-instruct-7B-DPO/5f79d177-3ca8-4c95-83bb-2abb0e803e72.json b/data/hfopenllm_v2/FuJhen/mistral-instruct-7B-DPO/5f79d177-3ca8-4c95-83bb-2abb0e803e72.json new file mode 100644 index 000000000..efbdfe34b --- /dev/null +++ b/data/hfopenllm_v2/FuJhen/mistral-instruct-7B-DPO/5f79d177-3ca8-4c95-83bb-2abb0e803e72.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FuJhen_mistral-instruct-7B-DPO/1762652579.625171", + "retrieved_timestamp": "1762652579.625172", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FuJhen/mistral-instruct-7B-DPO", + "developer": "FuJhen", + "inference_platform": "unknown", + "id": "FuJhen/mistral-instruct-7B-DPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49684171332065585 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46239050561386214 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4015625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30335771276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 14.496 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FuseAI/FuseChat-7B-v2.0/26ca0085-db25-4664-823a-f56e08081dc4.json b/data/hfopenllm_v2/FuseAI/FuseChat-7B-v2.0/26ca0085-db25-4664-823a-f56e08081dc4.json new file mode 100644 index 000000000..4eec917d1 --- /dev/null +++ b/data/hfopenllm_v2/FuseAI/FuseChat-7B-v2.0/26ca0085-db25-4664-823a-f56e08081dc4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FuseAI_FuseChat-7B-v2.0/1762652579.625878", + "retrieved_timestamp": "1762652579.625879", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FuseAI/FuseChat-7B-v2.0", + "developer": "FuseAI", + "inference_platform": "unknown", + "id": "FuseAI/FuseChat-7B-v2.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3423194900641409 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4954212795868764 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4796666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162400265957447 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.1-8B-Instruct/fdc9ea4d-acf8-4f2c-b727-482f464eb925.json b/data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.1-8B-Instruct/fdc9ea4d-acf8-4f2c-b727-482f464eb925.json new file mode 100644 index 000000000..b13fd775c --- /dev/null +++ b/data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.1-8B-Instruct/fdc9ea4d-acf8-4f2c-b727-482f464eb925.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FuseAI_FuseChat-Llama-3.1-8B-Instruct/1762652579.626143", + "retrieved_timestamp": "1762652579.626144", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FuseAI/FuseChat-Llama-3.1-8B-Instruct", + "developer": "FuseAI", + "inference_platform": "unknown", + "id": "FuseAI/FuseChat-Llama-3.1-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7204816553411615 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5119887898349903 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24773413897280966 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38200000000000006 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37333776595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.2-3B-Instruct/e39160a3-8332-467d-900f-52bb7d1446c1.json b/data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.2-3B-Instruct/e39160a3-8332-467d-900f-52bb7d1446c1.json new file mode 100644 index 000000000..5f3629f26 --- /dev/null +++ b/data/hfopenllm_v2/FuseAI/FuseChat-Llama-3.2-3B-Instruct/e39160a3-8332-467d-900f-52bb7d1446c1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FuseAI_FuseChat-Llama-3.2-3B-Instruct/1762652579.626356", + "retrieved_timestamp": "1762652579.626357", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FuseAI/FuseChat-Llama-3.2-3B-Instruct", + "developer": "FuseAI", + "inference_platform": "unknown", + "id": "FuseAI/FuseChat-Llama-3.2-3B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.684886102208806 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46583679221755164 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24244712990936557 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39139583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31316489361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/FuseAI/FuseChat-Qwen-2.5-7B-Instruct/1bae6b5e-47b0-4fe2-847a-8aec0a36342e.json b/data/hfopenllm_v2/FuseAI/FuseChat-Qwen-2.5-7B-Instruct/1bae6b5e-47b0-4fe2-847a-8aec0a36342e.json new file mode 100644 index 000000000..85c5c73db --- /dev/null +++ b/data/hfopenllm_v2/FuseAI/FuseChat-Qwen-2.5-7B-Instruct/1bae6b5e-47b0-4fe2-847a-8aec0a36342e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FuseAI_FuseChat-Qwen-2.5-7B-Instruct/1762652579.626579", + "retrieved_timestamp": "1762652579.626579", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FuseAI/FuseChat-Qwen-2.5-7B-Instruct", + "developer": "FuseAI", + "inference_platform": "unknown", + "id": "FuseAI/FuseChat-Qwen-2.5-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5905641475728844 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.552599883615556 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4561933534743202 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3873645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41181848404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GalrionSoftworks/MN-LooseCannon-12B-v1/eb76e049-3a5d-4786-9724-800b719a6113.json b/data/hfopenllm_v2/GalrionSoftworks/MN-LooseCannon-12B-v1/eb76e049-3a5d-4786-9724-800b719a6113.json new file mode 100644 index 000000000..84e651f7a --- /dev/null +++ b/data/hfopenllm_v2/GalrionSoftworks/MN-LooseCannon-12B-v1/eb76e049-3a5d-4786-9724-800b719a6113.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/GalrionSoftworks_MN-LooseCannon-12B-v1/1762652579.626794", + "retrieved_timestamp": "1762652579.626794", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GalrionSoftworks/MN-LooseCannon-12B-v1", + "developer": "GalrionSoftworks", + "inference_platform": "unknown", + "id": "GalrionSoftworks/MN-LooseCannon-12B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5417791459992819 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5128183808679557 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08534743202416918 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41384375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3195644946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GalrionSoftworks/MagnusIntellectus-12B-v1/99a948ab-cc5b-4f3a-aae0-684cbfb6ffb3.json b/data/hfopenllm_v2/GalrionSoftworks/MagnusIntellectus-12B-v1/99a948ab-cc5b-4f3a-aae0-684cbfb6ffb3.json new file mode 100644 index 000000000..a31127a1a --- /dev/null +++ b/data/hfopenllm_v2/GalrionSoftworks/MagnusIntellectus-12B-v1/99a948ab-cc5b-4f3a-aae0-684cbfb6ffb3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/GalrionSoftworks_MagnusIntellectus-12B-v1/1762652579.62705", + "retrieved_timestamp": "1762652579.627051", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GalrionSoftworks/MagnusIntellectus-12B-v1", + "developer": "GalrionSoftworks", + "inference_platform": "unknown", + "id": "GalrionSoftworks/MagnusIntellectus-12B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4421368635221213 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5323010476246133 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4428020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34208776595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct/68ff0a5c-9e76-410b-a4e3-4b7de0e7fe35.json b/data/hfopenllm_v2/GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct/68ff0a5c-9e76-410b-a4e3-4b7de0e7fe35.json new file mode 100644 index 000000000..562320e4a --- /dev/null +++ b/data/hfopenllm_v2/GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct/68ff0a5c-9e76-410b-a4e3-4b7de0e7fe35.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/GoToCompany_gemma2-9b-cpt-sahabatai-v1-instruct/1762652579.628178", + "retrieved_timestamp": "1762652579.628178", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct", + "developer": "GoToCompany", + "inference_platform": "unknown", + "id": "GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6550607942481504 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5954551751157878 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2054380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3347315436241611 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4778645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4263630319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct/aa363693-a300-4545-b7f3-05492646c202.json b/data/hfopenllm_v2/GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct/aa363693-a300-4545-b7f3-05492646c202.json new file mode 100644 index 000000000..3d9a9493e --- /dev/null +++ b/data/hfopenllm_v2/GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct/aa363693-a300-4545-b7f3-05492646c202.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/GoToCompany_llama3-8b-cpt-sahabatai-v1-instruct/1762652579.628486", + "retrieved_timestamp": "1762652579.628489", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct", + "developer": "GoToCompany", + "inference_platform": "unknown", + "id": "GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.523844510343666 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4951292004509417 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12764350453172205 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44884375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3453291223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1b9a4b84-1766-49ca-bd11-17a2340b9736.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1b9a4b84-1766-49ca-bd11-17a2340b9736.json new file mode 100644 index 000000000..9bc0ceeac --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1b9a4b84-1766-49ca-bd11-17a2340b9736.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1762652579.6293938", + "retrieved_timestamp": "1762652579.629396", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3416944817528602 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32921013057720044 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.002265861027190332 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3249166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16381316489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/235adbd2-8128-4428-af57-8d8e310ba56f.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/235adbd2-8128-4428-af57-8d8e310ba56f.json new file mode 100644 index 000000000..055a032a4 --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/235adbd2-8128-4428-af57-8d8e310ba56f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1762652579.629041", + "retrieved_timestamp": "1762652579.629042", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.347189900574919 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32683063456958195 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16414561170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/a82acc9c-4093-4e0d-a862-7d6eb3cb7146.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/a82acc9c-4093-4e0d-a862-7d6eb3cb7146.json new file mode 100644 index 000000000..7615c6355 --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/a82acc9c-4093-4e0d-a862-7d6eb3cb7146.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/1762652579.629639", + "retrieved_timestamp": "1762652579.6296399", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47685806992114255 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.418600731531926 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24328859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3674895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27825797872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/baae7cee-8b76-456f-96dc-5ac900a9a36e.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/baae7cee-8b76-456f-96dc-5ac900a9a36e.json new file mode 100644 index 000000000..57edbee28 --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/baae7cee-8b76-456f-96dc-5ac900a9a36e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/1762652579.629877", + "retrieved_timestamp": "1762652579.629878", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.421553699738915 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40418921704436744 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1268882175226586 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23993288590604026 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37685416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25615026595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/9363a90d-6ec7-4de2-af17-a3e3e25de7d9.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/9363a90d-6ec7-4de2-af17-a3e3e25de7d9.json new file mode 100644 index 000000000..b1f8629d0 --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/9363a90d-6ec7-4de2-af17-a3e3e25de7d9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/1762652579.630181", + "retrieved_timestamp": "1762652579.6301818", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42525055740989465 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4053446177133173 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13066465256797583 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24328859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37018749999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25556848404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/af440c67-78de-4053-98d8-8cded9657860.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/af440c67-78de-4053-98d8-8cded9657860.json new file mode 100644 index 000000000..1d5b274df --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/af440c67-78de-4053-98d8-8cded9657860.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/1762652579.6304152", + "retrieved_timestamp": "1762652579.630416", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8291666112581284 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6355637424320617 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5422960725075529 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3422818791946309 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4286666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5018284574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/9c443687-99df-4cd9-8e19-d40cd83b30bc.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/9c443687-99df-4cd9-8e19-d40cd83b30bc.json new file mode 100644 index 000000000..a19e25a03 --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/9c443687-99df-4cd9-8e19-d40cd83b30bc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/1762652579.630644", + "retrieved_timestamp": "1762652579.630645", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7813811797142693 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5309672164610734 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45317220543806647 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43539583333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4119847074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/b6bf7c36-006c-4256-a315-1de70e2540c3.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/b6bf7c36-006c-4256-a315-1de70e2540c3.json new file mode 100644 index 000000000..2714c553c --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/b6bf7c36-006c-4256-a315-1de70e2540c3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/1762652579.631213", + "retrieved_timestamp": "1762652579.631215", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "id": "Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41883092417009093 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41242101633634826 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12009063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25083892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3528541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2554853723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-3b-v6.0/89947a58-5e39-468e-bbbc-2f3556a1c8f1.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-3b-v6.0/89947a58-5e39-468e-bbbc-2f3556a1c8f1.json new file mode 100644 index 000000000..3546b84c2 --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-3b-v6.0/89947a58-5e39-468e-bbbc-2f3556a1c8f1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_josie-3b-v6.0/1762652579.631514", + "retrieved_timestamp": "1762652579.6315148", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Goekdeniz-Guelmez/josie-3b-v6.0", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "id": "Goekdeniz-Guelmez/josie-3b-v6.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6009554648333089 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4496147842264783 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2938066465256798 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.386125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32197473404255317 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/7c2cc003-fab3-4fc9-a6b6-fb7075261e50.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/7c2cc003-fab3-4fc9-a6b6-fb7075261e50.json new file mode 100644 index 000000000..5c185a0a0 --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/7c2cc003-fab3-4fc9-a6b6-fb7075261e50.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/1762652579.6322381", + "retrieved_timestamp": "1762652579.632239", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "id": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7597740661444966 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.510712680636641 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42371601208459214 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45393750000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4011801861702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/90d4e4e1-2185-4d21-8730-f1a4bf413157.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/90d4e4e1-2185-4d21-8730-f1a4bf413157.json new file mode 100644 index 000000000..4d912cb62 --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0-step2000/90d4e4e1-2185-4d21-8730-f1a4bf413157.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/1762652579.632", + "retrieved_timestamp": "1762652579.632001", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "id": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7627716680629618 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5097811950503962 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45793750000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40325797872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0/aa158f5d-94a5-4f40-8a65-87fe9605abc1.json b/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0/aa158f5d-94a5-4f40-8a65-87fe9605abc1.json new file mode 100644 index 000000000..e7aa100cc --- /dev/null +++ b/data/hfopenllm_v2/Goekdeniz-Guelmez/josie-7b-v6.0/aa158f5d-94a5-4f40-8a65-87fe9605abc1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_josie-7b-v6.0/1762652579.631763", + "retrieved_timestamp": "1762652579.631764", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Goekdeniz-Guelmez/josie-7b-v6.0", + "developer": "Goekdeniz-Guelmez", + "inference_platform": "unknown", + "id": "Goekdeniz-Guelmez/josie-7b-v6.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7411645544931892 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5104855208094123 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43580060422960726 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41539583333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3806515957446808 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GreenNode/GreenNode-small-9B-it/d13def83-5ff8-4cde-aef5-b3c268c40c16.json b/data/hfopenllm_v2/GreenNode/GreenNode-small-9B-it/d13def83-5ff8-4cde-aef5-b3c268c40c16.json new file mode 100644 index 000000000..61f9284c1 --- /dev/null +++ b/data/hfopenllm_v2/GreenNode/GreenNode-small-9B-it/d13def83-5ff8-4cde-aef5-b3c268c40c16.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/GreenNode_GreenNode-small-9B-it/1762652579.6324449", + "retrieved_timestamp": "1762652579.632446", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GreenNode/GreenNode-small-9B-it", + "developer": "GreenNode", + "inference_platform": "unknown", + "id": "GreenNode/GreenNode-small-9B-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7436125037123721 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.599383874005197 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17447129909365558 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42041666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3927027925531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GritLM/GritLM-7B-KTO/6d7f26d7-2336-4def-9d17-09d30a89e02d.json b/data/hfopenllm_v2/GritLM/GritLM-7B-KTO/6d7f26d7-2336-4def-9d17-09d30a89e02d.json new file mode 100644 index 000000000..e9bc8277c --- /dev/null +++ b/data/hfopenllm_v2/GritLM/GritLM-7B-KTO/6d7f26d7-2336-4def-9d17-09d30a89e02d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/GritLM_GritLM-7B-KTO/1762652579.632807", + "retrieved_timestamp": "1762652579.632808", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GritLM/GritLM-7B-KTO", + "developer": "GritLM", + "inference_platform": "unknown", + "id": "GritLM/GritLM-7B-KTO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5310132670203948 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.485293719684692 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027190332326283987 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37102083333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26803523936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GritLM/GritLM-8x7B-KTO/de98eb82-0606-46b8-bbfb-d054a0f6ef2c.json b/data/hfopenllm_v2/GritLM/GritLM-8x7B-KTO/de98eb82-0606-46b8-bbfb-d054a0f6ef2c.json new file mode 100644 index 000000000..fb1384089 --- /dev/null +++ b/data/hfopenllm_v2/GritLM/GritLM-8x7B-KTO/de98eb82-0606-46b8-bbfb-d054a0f6ef2c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/GritLM_GritLM-8x7B-KTO/1762652579.633089", + "retrieved_timestamp": "1762652579.633089", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GritLM/GritLM-8x7B-KTO", + "developer": "GritLM", + "inference_platform": "unknown", + "id": "GritLM/GritLM-8x7B-KTO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5714049832222946 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5820304362331497 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12235649546827794 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42165625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36477726063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.5-12b-Nemo/f9ed0b0f-6fa9-4450-97fe-204f6dc8d88a.json b/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.5-12b-Nemo/f9ed0b0f-6fa9-4450-97fe-204f6dc8d88a.json new file mode 100644 index 000000000..89ae80fb2 --- /dev/null +++ b/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.5-12b-Nemo/f9ed0b0f-6fa9-4450-97fe-204f6dc8d88a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-1.5-12b-Nemo/1762652579.633812", + "retrieved_timestamp": "1762652579.633813", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gryphe/Pantheon-RP-1.5-12b-Nemo", + "developer": "Gryphe", + "inference_platform": "unknown", + "id": "Gryphe/Pantheon-RP-1.5-12b-Nemo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47630841722186024 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.519582216884963 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04909365558912387 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44203125000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3302027925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO/a2445d2d-b8a2-44e4-9c74-7401e7afde75.json b/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO/a2445d2d-b8a2-44e4-9c74-7401e7afde75.json new file mode 100644 index 000000000..1b71efd90 --- /dev/null +++ b/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO/a2445d2d-b8a2-44e4-9c74-7401e7afde75.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-1.6-12b-Nemo-KTO/1762652579.634284", + "retrieved_timestamp": "1762652579.634285", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO", + "developer": "Gryphe", + "inference_platform": "unknown", + "id": "Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4636187537954849 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5276980814125921 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.052870090634441085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4247916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33818151595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo/9a2ca2e5-a2e9-460f-b4dc-a6293ca13003.json b/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo/9a2ca2e5-a2e9-460f-b4dc-a6293ca13003.json new file mode 100644 index 000000000..bd1320b2b --- /dev/null +++ b/data/hfopenllm_v2/Gryphe/Pantheon-RP-1.6-12b-Nemo/9a2ca2e5-a2e9-460f-b4dc-a6293ca13003.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-1.6-12b-Nemo/1762652579.634059", + "retrieved_timestamp": "1762652579.6340601", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gryphe/Pantheon-RP-1.6-12b-Nemo", + "developer": "Gryphe", + "inference_platform": "unknown", + "id": "Gryphe/Pantheon-RP-1.6-12b-Nemo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44805671174705336 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5204007434392454 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04607250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4287604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33111702127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small/f5f73aa0-2223-49c0-a2ad-df38ee33355b.json b/data/hfopenllm_v2/Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small/f5f73aa0-2223-49c0-a2ad-df38ee33355b.json new file mode 100644 index 000000000..8d4e7e18b --- /dev/null +++ b/data/hfopenllm_v2/Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small/f5f73aa0-2223-49c0-a2ad-df38ee33355b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-Pure-1.6.2-22b-Small/1762652579.6344929", + "retrieved_timestamp": "1762652579.6344929", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small", + "developer": "Gryphe", + "inference_platform": "unknown", + "id": "Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6931042965996888 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5304537230538597 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20241691842900303 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288590604026846 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37647916666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39419880319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/5aa1bdc6-4b8f-411f-9150-41217a94ec5e.json b/data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/5aa1bdc6-4b8f-411f-9150-41217a94ec5e.json new file mode 100644 index 000000000..e0c5e6e20 --- /dev/null +++ b/data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/5aa1bdc6-4b8f-411f-9150-41217a94ec5e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/1762652579.63471", + "retrieved_timestamp": "1762652579.634711", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall", + "developer": "GuilhermeNaturaUmana", + "inference_platform": "unknown", + "id": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4985405391029136 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5644838945274894 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25755287009063443 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43728125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44290226063829785 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/9ddf874c-16a9-4f66-a3c5-140f10bc4787.json b/data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/9ddf874c-16a9-4f66-a3c5-140f10bc4787.json new file mode 100644 index 000000000..933bf263c --- /dev/null +++ b/data/hfopenllm_v2/GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall/9ddf874c-16a9-4f66-a3c5-140f10bc4787.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/1762652579.634963", + "retrieved_timestamp": "1762652579.634964", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall", + "developer": "GuilhermeNaturaUmana", + "inference_platform": "unknown", + "id": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47910654840268263 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5648715950622487 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4439166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4408244680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1.2/37dad0cc-36d1-4a4c-8d9c-0f5246889a0c.json b/data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1.2/37dad0cc-36d1-4a4c-8d9c-0f5246889a0c.json new file mode 100644 index 000000000..c13f5a5cb --- /dev/null +++ b/data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1.2/37dad0cc-36d1-4a4c-8d9c-0f5246889a0c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HarbingerX_Zeitgeist-3b-V1.2/1762652579.6374269", + "retrieved_timestamp": "1762652579.637428", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HarbingerX/Zeitgeist-3b-V1.2", + "developer": "HarbingerX", + "inference_platform": "unknown", + "id": "HarbingerX/Zeitgeist-3b-V1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6754189993661264 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4440650477102142 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10120845921450151 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35790625000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30560172872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1/3bc34460-661d-404b-bb1c-5b2fe395b897.json b/data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1/3bc34460-661d-404b-bb1c-5b2fe395b897.json new file mode 100644 index 000000000..7eaa7ffd8 --- /dev/null +++ b/data/hfopenllm_v2/HarbingerX/Zeitgeist-3b-V1/3bc34460-661d-404b-bb1c-5b2fe395b897.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HarbingerX_Zeitgeist-3b-V1/1762652579.637166", + "retrieved_timestamp": "1762652579.6371672", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HarbingerX/Zeitgeist-3b-V1", + "developer": "HarbingerX", + "inference_platform": "unknown", + "id": "HarbingerX/Zeitgeist-3b-V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6711724889958643 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4440790761237121 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10347432024169184 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3579375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3009474734042553 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Hastagaras/L3.2-JametMini-3B-MK.III/cf208ef7-8a9b-4633-8161-dae0825c380e.json b/data/hfopenllm_v2/Hastagaras/L3.2-JametMini-3B-MK.III/cf208ef7-8a9b-4633-8161-dae0825c380e.json new file mode 100644 index 000000000..eaaad28c8 --- /dev/null +++ b/data/hfopenllm_v2/Hastagaras/L3.2-JametMini-3B-MK.III/cf208ef7-8a9b-4633-8161-dae0825c380e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Hastagaras_L3.2-JametMini-3B-MK.III/1762652579.6376362", + "retrieved_timestamp": "1762652579.6376371", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hastagaras/L3.2-JametMini-3B-MK.III", + "developer": "Hastagaras", + "inference_platform": "unknown", + "id": "Hastagaras/L3.2-JametMini-3B-MK.III" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6182662003484088 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45385245294894094 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14577039274924472 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3686041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2982878989361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HelpingAI/Cipher-20B/21f72176-cf3b-43ae-aa6e-51d9fe5a6e90.json b/data/hfopenllm_v2/HelpingAI/Cipher-20B/21f72176-cf3b-43ae-aa6e-51d9fe5a6e90.json new file mode 100644 index 000000000..7695ed0f0 --- /dev/null +++ b/data/hfopenllm_v2/HelpingAI/Cipher-20B/21f72176-cf3b-43ae-aa6e-51d9fe5a6e90.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HelpingAI_Cipher-20B/1762652579.638349", + "retrieved_timestamp": "1762652579.63835", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HelpingAI/Cipher-20B", + "developer": "HelpingAI", + "inference_platform": "unknown", + "id": "HelpingAI/Cipher-20B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5377575942942504 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6032432743536918 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19939577039274925 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40029166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3744182180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 20.551 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HelpingAI/Dhanishtha-Large/e097ccca-ab91-4f16-bbfa-ca97c91fdb77.json b/data/hfopenllm_v2/HelpingAI/Dhanishtha-Large/e097ccca-ab91-4f16-bbfa-ca97c91fdb77.json new file mode 100644 index 000000000..af7d5be7d --- /dev/null +++ b/data/hfopenllm_v2/HelpingAI/Dhanishtha-Large/e097ccca-ab91-4f16-bbfa-ca97c91fdb77.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HelpingAI_Dhanishtha-Large/1762652579.638597", + "retrieved_timestamp": "1762652579.638598", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HelpingAI/Dhanishtha-Large", + "developer": "HelpingAI", + "inference_platform": "unknown", + "id": "HelpingAI/Dhanishtha-Large" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24567370133468086 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46036539145861094 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38451041666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2755152925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HelpingAI/Priya-10B/94aca944-b0a9-46ec-bdab-53bb5cbe3b78.json b/data/hfopenllm_v2/HelpingAI/Priya-10B/94aca944-b0a9-46ec-bdab-53bb5cbe3b78.json new file mode 100644 index 000000000..a2bdd9951 --- /dev/null +++ b/data/hfopenllm_v2/HelpingAI/Priya-10B/94aca944-b0a9-46ec-bdab-53bb5cbe3b78.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HelpingAI_Priya-10B/1762652579.638817", + "retrieved_timestamp": "1762652579.638818", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HelpingAI/Priya-10B", + "developer": "HelpingAI", + "inference_platform": "unknown", + "id": "HelpingAI/Priya-10B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40429283190822574 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4441457310476767 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0188821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3792708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24925199468085107 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.211 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HelpingAI/Priya-3B/f709afd7-3220-41b0-909a-74d9086c7dd9.json b/data/hfopenllm_v2/HelpingAI/Priya-3B/f709afd7-3220-41b0-909a-74d9086c7dd9.json new file mode 100644 index 000000000..491e056ff --- /dev/null +++ b/data/hfopenllm_v2/HelpingAI/Priya-3B/f709afd7-3220-41b0-909a-74d9086c7dd9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HelpingAI_Priya-3B/1762652579.639023", + "retrieved_timestamp": "1762652579.639024", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HelpingAI/Priya-3B", + "developer": "HelpingAI", + "inference_platform": "unknown", + "id": "HelpingAI/Priya-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4525780484669566 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3961184863327844 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3713020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23387632978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.81 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-alpha/2029aa96-40b2-4af8-a7fa-8ae968b20502.json b/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-alpha/2029aa96-40b2-4af8-a7fa-8ae968b20502.json new file mode 100644 index 000000000..58383a59a --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-alpha/2029aa96-40b2-4af8-a7fa-8ae968b20502.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HuggingFaceH4_zephyr-7b-alpha/1762652579.640769", + "retrieved_timestamp": "1762652579.64077", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HuggingFaceH4/zephyr-7b-alpha", + "developer": "HuggingFaceH4", + "inference_platform": "unknown", + "id": "HuggingFaceH4/zephyr-7b-alpha" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5191480826429429 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45828635059044115 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3949583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2795046542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-beta/3b9d5166-4144-4222-a39d-3d1d3956a6e8.json b/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-beta/3b9d5166-4144-4222-a39d-3d1d3956a6e8.json new file mode 100644 index 000000000..737d808d1 --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceH4/zephyr-7b-beta/3b9d5166-4144-4222-a39d-3d1d3956a6e8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HuggingFaceH4_zephyr-7b-beta/1762652579.641025", + "retrieved_timestamp": "1762652579.641026", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HuggingFaceH4/zephyr-7b-beta", + "developer": "HuggingFaceH4", + "inference_platform": "unknown", + "id": "HuggingFaceH4/zephyr-7b-beta" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49504315216957673 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.431582191918003 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028700906344410877 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3925416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2780917553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1/8b347bb4-9f6d-4c82-bd5d-2fb5f7c8f881.json b/data/hfopenllm_v2/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1/8b347bb4-9f6d-4c82-bd5d-2fb5f7c8f881.json new file mode 100644 index 000000000..f2336f38b --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1/8b347bb4-9f6d-4c82-bd5d-2fb5f7c8f881.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HuggingFaceH4_zephyr-orpo-141b-A35b-v0.1/1762652579.641484", + "retrieved_timestamp": "1762652579.641485", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1", + "developer": "HuggingFaceH4", + "inference_platform": "unknown", + "id": "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6510891102275296 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6290439728524093 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20468277945619334 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3783557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4465208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4586103723404255 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 140.621 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B-Instruct/690a5844-000e-4949-bbf9-8bd1ff2cb1bd.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B-Instruct/690a5844-000e-4949-bbf9-8bd1ff2cb1bd.json new file mode 100644 index 000000000..d491265c8 --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B-Instruct/690a5844-000e-4949-bbf9-8bd1ff2cb1bd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-1.7B-Instruct/1762652579.641991", + "retrieved_timestamp": "1762652579.641991", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HuggingFaceTB/SmolLM-1.7B-Instruct", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "id": "HuggingFaceTB/SmolLM-1.7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23478259905938464 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28851114363217695 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.021148036253776436 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3486666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11660571808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.71 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B/e1b7c18a-bff1-44a3-b589-95bcb0f88e36.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B/e1b7c18a-bff1-44a3-b589-95bcb0f88e36.json new file mode 100644 index 000000000..8a24a576a --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-1.7B/e1b7c18a-bff1-44a3-b589-95bcb0f88e36.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-1.7B/1762652579.6417458", + "retrieved_timestamp": "1762652579.6417458", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HuggingFaceTB/SmolLM-1.7B", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "id": "HuggingFaceTB/SmolLM-1.7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23615673080759053 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3180516538964782 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24161073825503357 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34209375000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11477726063829788 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.71 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M-Instruct/adff7af4-9bae-420a-9751-9f68ab81bf99.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M-Instruct/adff7af4-9bae-420a-9751-9f68ab81bf99.json new file mode 100644 index 000000000..a3a8be93f --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M-Instruct/adff7af4-9bae-420a-9751-9f68ab81bf99.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-135M-Instruct/1762652579.642397", + "retrieved_timestamp": "1762652579.6423979", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HuggingFaceTB/SmolLM-135M-Instruct", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "id": "HuggingFaceTB/SmolLM-135M-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12140121544169469 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30150816789978757 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.005287009063444109 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36345833333333327 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11760305851063829 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M/8cd60e42-3429-4938-b43e-9c951a57ca9f.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M/8cd60e42-3429-4938-b43e-9c951a57ca9f.json new file mode 100644 index 000000000..ef56ece1f --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-135M/8cd60e42-3429-4938-b43e-9c951a57ca9f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-135M/1762652579.642195", + "retrieved_timestamp": "1762652579.642196", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HuggingFaceTB/SmolLM-135M", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "id": "HuggingFaceTB/SmolLM-135M" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21247622973709757 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3046054260062988 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4366041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11220079787234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.13 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M-Instruct/ec13c105-c846-4420-91af-d42e98b7a818.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M-Instruct/ec13c105-c846-4420-91af-d42e98b7a818.json new file mode 100644 index 000000000..5a94fe9cf --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M-Instruct/ec13c105-c846-4420-91af-d42e98b7a818.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-360M-Instruct/1762652579.642821", + "retrieved_timestamp": "1762652579.642821", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HuggingFaceTB/SmolLM-360M-Instruct", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "id": "HuggingFaceTB/SmolLM-360M-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19516549422199764 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28851114363217695 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01812688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34717708333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11660571808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.362 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M/236f7bdd-be50-4287-82b7-6efddc9dd3f4.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M/236f7bdd-be50-4287-82b7-6efddc9dd3f4.json new file mode 100644 index 000000000..d2462466b --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM-360M/236f7bdd-be50-4287-82b7-6efddc9dd3f4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-360M/1762652579.642613", + "retrieved_timestamp": "1762652579.6426141", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HuggingFaceTB/SmolLM-360M", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "id": "HuggingFaceTB/SmolLM-360M" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2133505764704318 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30645160333152527 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.011329305135951661 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40178125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11236702127659574 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.36 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B-Instruct/09b81183-8ff2-44d5-a515-63cddc3e55c6.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B-Instruct/09b81183-8ff2-44d5-a515-63cddc3e55c6.json new file mode 100644 index 000000000..8aea07392 --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B-Instruct/09b81183-8ff2-44d5-a515-63cddc3e55c6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-1.7B-Instruct/1762652579.643299", + "retrieved_timestamp": "1762652579.6433", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HuggingFaceTB/SmolLM2-1.7B-Instruct", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "id": "HuggingFaceTB/SmolLM2-1.7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5367835121920947 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3598617531415158 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0581570996978852 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.342125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2053690159574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.711 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B/db57503c-bfe7-4691-983e-68af941e8b1e.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B/db57503c-bfe7-4691-983e-68af941e8b1e.json new file mode 100644 index 000000000..6e79543b3 --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-1.7B/db57503c-bfe7-4691-983e-68af941e8b1e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-1.7B/1762652579.6430368", + "retrieved_timestamp": "1762652579.643038", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HuggingFaceTB/SmolLM2-1.7B", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "id": "HuggingFaceTB/SmolLM2-1.7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2440003634800108 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3452594377166261 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3485416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2137632978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.71 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/9a9fb17d-49ae-4a82-95c8-c8b55923d72f.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/9a9fb17d-49ae-4a82-95c8-c8b55923d72f.json new file mode 100644 index 000000000..b84ca446f --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/9a9fb17d-49ae-4a82-95c8-c8b55923d72f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-135M-Instruct/1762652579.644038", + "retrieved_timestamp": "1762652579.644039", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HuggingFaceTB/SmolLM2-135M-Instruct", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "id": "HuggingFaceTB/SmolLM2-135M-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05925167444602544 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31347502947335903 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23406040268456377 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3871458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10920877659574468 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/df60b16b-184c-43d9-ac79-8627f09d265b.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/df60b16b-184c-43d9-ac79-8627f09d265b.json new file mode 100644 index 000000000..3f8103f38 --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M-Instruct/df60b16b-184c-43d9-ac79-8627f09d265b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-135M-Instruct/1762652579.643796", + "retrieved_timestamp": "1762652579.643796", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HuggingFaceTB/SmolLM2-135M-Instruct", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "id": "HuggingFaceTB/SmolLM2-135M-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2883138960181208 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3124321328066677 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0030211480362537764 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23573825503355705 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36621875000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11145279255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M/1761caca-524f-4d59-81dd-631e3e24e0e5.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M/1761caca-524f-4d59-81dd-631e3e24e0e5.json new file mode 100644 index 000000000..4b67a7664 --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-135M/1761caca-524f-4d59-81dd-631e3e24e0e5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-135M/1762652579.643546", + "retrieved_timestamp": "1762652579.6435468", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HuggingFaceTB/SmolLM2-135M", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "id": "HuggingFaceTB/SmolLM2-135M" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18177657504310785 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3044234246877141 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2483221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4111770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10945811170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/06409b6c-9d26-4bee-af75-16e6edb87a93.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/06409b6c-9d26-4bee-af75-16e6edb87a93.json new file mode 100644 index 000000000..b491ae17e --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/06409b6c-9d26-4bee-af75-16e6edb87a93.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-360M-Instruct/1762652579.644474", + "retrieved_timestamp": "1762652579.644475", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HuggingFaceTB/SmolLM2-360M-Instruct", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "id": "HuggingFaceTB/SmolLM2-360M-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08303191088533979 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3052703401844317 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.008308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34228125000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11261635638297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.362 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/09ba6e80-5ab4-4c8c-b7ad-c1497413c207.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/09ba6e80-5ab4-4c8c-b7ad-c1497413c207.json new file mode 100644 index 000000000..8390cf0fe --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M-Instruct/09ba6e80-5ab4-4c8c-b7ad-c1497413c207.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-360M-Instruct/1762652579.6446972", + "retrieved_timestamp": "1762652579.6446981", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HuggingFaceTB/SmolLM2-360M-Instruct", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "id": "HuggingFaceTB/SmolLM2-360M-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38415958545548035 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31435050538888504 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2550335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.346125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11170212765957446 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.36 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M/7751b65d-2bba-465c-9a1e-5ae51d94fcf6.json b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M/7751b65d-2bba-465c-9a1e-5ae51d94fcf6.json new file mode 100644 index 000000000..d951d5a80 --- /dev/null +++ b/data/hfopenllm_v2/HuggingFaceTB/SmolLM2-360M/7751b65d-2bba-465c-9a1e-5ae51d94fcf6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-360M/1762652579.6442492", + "retrieved_timestamp": "1762652579.6442502", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HuggingFaceTB/SmolLM2-360M", + "developer": "HuggingFaceTB", + "inference_platform": "unknown", + "id": "HuggingFaceTB/SmolLM2-360M" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21145227995053123 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3233478044302361 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24580536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3954270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11693816489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.36 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HumanLLMs/Humanish-LLama3-8B-Instruct/e69e4e90-8177-44f5-8497-0a45ca9155ea.json b/data/hfopenllm_v2/HumanLLMs/Humanish-LLama3-8B-Instruct/e69e4e90-8177-44f5-8497-0a45ca9155ea.json new file mode 100644 index 000000000..4b5a70e0b --- /dev/null +++ b/data/hfopenllm_v2/HumanLLMs/Humanish-LLama3-8B-Instruct/e69e4e90-8177-44f5-8497-0a45ca9155ea.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HumanLLMs_Humanish-LLama3-8B-Instruct/1762652579.6448839", + "retrieved_timestamp": "1762652579.644885", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HumanLLMs/Humanish-LLama3-8B-Instruct", + "developer": "HumanLLMs", + "inference_platform": "unknown", + "id": "HumanLLMs/Humanish-LLama3-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6497903340913221 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49677096627896544 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1027190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35815624999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37017952127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407/de0dbc50-5d26-4005-967c-3dcbde3a1282.json b/data/hfopenllm_v2/HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407/de0dbc50-5d26-4005-967c-3dcbde3a1282.json new file mode 100644 index 000000000..1f457a233 --- /dev/null +++ b/data/hfopenllm_v2/HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407/de0dbc50-5d26-4005-967c-3dcbde3a1282.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HumanLLMs_Humanish-Mistral-Nemo-Instruct-2407/1762652579.6451478", + "retrieved_timestamp": "1762652579.645149", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407", + "developer": "HumanLLMs", + "inference_platform": "unknown", + "id": "HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5451269298793867 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5261780772532613 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13670694864048338 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39676041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35206117021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/HumanLLMs/Humanish-Qwen2.5-7B-Instruct/df720663-5e82-4de7-9a19-88287bb5f56a.json b/data/hfopenllm_v2/HumanLLMs/Humanish-Qwen2.5-7B-Instruct/df720663-5e82-4de7-9a19-88287bb5f56a.json new file mode 100644 index 000000000..0e791bf41 --- /dev/null +++ b/data/hfopenllm_v2/HumanLLMs/Humanish-Qwen2.5-7B-Instruct/df720663-5e82-4de7-9a19-88287bb5f56a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HumanLLMs_Humanish-Qwen2.5-7B-Instruct/1762652579.645365", + "retrieved_timestamp": "1762652579.645366", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HumanLLMs/Humanish-Qwen2.5-7B-Instruct", + "developer": "HumanLLMs", + "inference_platform": "unknown", + "id": "HumanLLMs/Humanish-Qwen2.5-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7284250233824031 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5363681457807072 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3980625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4398271276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Intel/neural-chat-7b-v3-1/23b6bf8e-c79a-4620-9e15-2742f45130af.json b/data/hfopenllm_v2/Intel/neural-chat-7b-v3-1/23b6bf8e-c79a-4620-9e15-2742f45130af.json new file mode 100644 index 000000000..f4372cd85 --- /dev/null +++ b/data/hfopenllm_v2/Intel/neural-chat-7b-v3-1/23b6bf8e-c79a-4620-9e15-2742f45130af.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Intel_neural-chat-7b-v3-1/1762652579.6473012", + "retrieved_timestamp": "1762652579.647302", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Intel/neural-chat-7b-v3-1", + "developer": "Intel", + "inference_platform": "unknown", + "id": "Intel/neural-chat-7b-v3-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4686897432146704 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5051565464054848 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.035498489425981876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49789583333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2677859042553192 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Intel/neural-chat-7b-v3-2/f8842523-53de-4197-9cf4-979780cbe127.json b/data/hfopenllm_v2/Intel/neural-chat-7b-v3-2/f8842523-53de-4197-9cf4-979780cbe127.json new file mode 100644 index 000000000..08e7574a2 --- /dev/null +++ b/data/hfopenllm_v2/Intel/neural-chat-7b-v3-2/f8842523-53de-4197-9cf4-979780cbe127.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Intel_neural-chat-7b-v3-2/1762652579.647583", + "retrieved_timestamp": "1762652579.647584", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Intel/neural-chat-7b-v3-2", + "developer": "Intel", + "inference_platform": "unknown", + "id": "Intel/neural-chat-7b-v3-2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4988397452093778 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5032226831964403 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48952083333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26670545212765956 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Intel/neural-chat-7b-v3-3/0bec0f9a-863b-42f5-96eb-7263eb1c8a61.json b/data/hfopenllm_v2/Intel/neural-chat-7b-v3-3/0bec0f9a-863b-42f5-96eb-7263eb1c8a61.json new file mode 100644 index 000000000..55271e9a4 --- /dev/null +++ b/data/hfopenllm_v2/Intel/neural-chat-7b-v3-3/0bec0f9a-863b-42f5-96eb-7263eb1c8a61.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Intel_neural-chat-7b-v3-3/1762652579.6477928", + "retrieved_timestamp": "1762652579.647794", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Intel/neural-chat-7b-v3-3", + "developer": "Intel", + "inference_platform": "unknown", + "id": "Intel/neural-chat-7b-v3-3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4762585495374495 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48766180524289693 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4859583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2624667553191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Intel/neural-chat-7b-v3/617dbd41-3ca3-46d8-8fd2-491d6be39554.json b/data/hfopenllm_v2/Intel/neural-chat-7b-v3/617dbd41-3ca3-46d8-8fd2-491d6be39554.json new file mode 100644 index 000000000..84cbcc05b --- /dev/null +++ b/data/hfopenllm_v2/Intel/neural-chat-7b-v3/617dbd41-3ca3-46d8-8fd2-491d6be39554.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Intel_neural-chat-7b-v3/1762652579.646828", + "retrieved_timestamp": "1762652579.6468291", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Intel/neural-chat-7b-v3", + "developer": "Intel", + "inference_platform": "unknown", + "id": "Intel/neural-chat-7b-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27779735546128714 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5048316221363103 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5054895833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26986369680851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.5/c645a252-366a-4890-a16b-bf687bfbb593.json b/data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.5/c645a252-366a-4890-a16b-bf687bfbb593.json new file mode 100644 index 000000000..7bbd2573f --- /dev/null +++ b/data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.5/c645a252-366a-4890-a16b-bf687bfbb593.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Invalid-Null_PeiYangMe-0.5/1762652579.648252", + "retrieved_timestamp": "1762652579.648252", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Invalid-Null/PeiYangMe-0.5", + "developer": "Invalid-Null", + "inference_platform": "unknown", + "id": "Invalid-Null/PeiYangMe-0.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14088507382633633 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27907748194216614 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24412751677852348 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37381249999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11087101063829788 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.7/294c1745-38cb-4b1e-aae6-e2878ab9065a.json b/data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.7/294c1745-38cb-4b1e-aae6-e2878ab9065a.json new file mode 100644 index 000000000..0faf507c2 --- /dev/null +++ b/data/hfopenllm_v2/Invalid-Null/PeiYangMe-0.7/294c1745-38cb-4b1e-aae6-e2878ab9065a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Invalid-Null_PeiYangMe-0.7/1762652579.648521", + "retrieved_timestamp": "1762652579.648522", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Invalid-Null/PeiYangMe-0.7", + "developer": "Invalid-Null", + "inference_platform": "unknown", + "id": "Invalid-Null/PeiYangMe-0.7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1491032682172192 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30275310145886614 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.011329305135951661 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2332214765100671 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38571874999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11012300531914894 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/e8bdfeef-9795-4b00-adec-6ac41c6718f7.json b/data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/e8bdfeef-9795-4b00-adec-6ac41c6718f7.json new file mode 100644 index 000000000..bffaabc96 --- /dev/null +++ b/data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/e8bdfeef-9795-4b00-adec-6ac41c6718f7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Isaak-Carter_JOSIEv4o-8b-stage1-v4/1762652579.648735", + "retrieved_timestamp": "1762652579.648736", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Isaak-Carter/JOSIEv4o-8b-stage1-v4", + "developer": "Isaak-Carter", + "inference_platform": "unknown", + "id": "Isaak-Carter/JOSIEv4o-8b-stage1-v4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2552660274737696 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4724973116620121 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.052870090634441085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3654375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3316156914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/f28b57ba-103a-41bb-93b0-7b25fd155351.json b/data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/f28b57ba-103a-41bb-93b0-7b25fd155351.json new file mode 100644 index 000000000..d854eea01 --- /dev/null +++ b/data/hfopenllm_v2/Isaak-Carter/JOSIEv4o-8b-stage1-v4/f28b57ba-103a-41bb-93b0-7b25fd155351.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Isaak-Carter_JOSIEv4o-8b-stage1-v4/1762652579.6489909", + "retrieved_timestamp": "1762652579.648992", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Isaak-Carter/JOSIEv4o-8b-stage1-v4", + "developer": "Isaak-Carter", + "inference_platform": "unknown", + "id": "Isaak-Carter/JOSIEv4o-8b-stage1-v4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2476972211509905 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4758066295235124 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3641041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32920545212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/817eb9e1-bd7d-4033-b0ea-bc7df58dc087.json b/data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/817eb9e1-bd7d-4033-b0ea-bc7df58dc087.json new file mode 100644 index 000000000..7b3a4d427 --- /dev/null +++ b/data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/817eb9e1-bd7d-4033-b0ea-bc7df58dc087.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/1762652579.649409", + "retrieved_timestamp": "1762652579.64941", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", + "developer": "Isaak-Carter", + "inference_platform": "unknown", + "id": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7841039552830933 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5310923599182072 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47205438066465255 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43539583333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4128158244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated/2013b3a9-3644-4f66-9941-b5d2ba6e7b81.json b/data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated/2013b3a9-3644-4f66-9941-b5d2ba6e7b81.json new file mode 100644 index 000000000..9ba116a2c --- /dev/null +++ b/data/hfopenllm_v2/Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated/2013b3a9-3644-4f66-9941-b5d2ba6e7b81.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated/1762652579.6491818", + "retrieved_timestamp": "1762652579.649183", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated", + "developer": "Isaak-Carter", + "inference_platform": "unknown", + "id": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7317473193349202 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5396376284460921 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49244712990936557 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4086666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4276097074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/J-LAB/Thynk_orpo/3565fba3-e63d-49f8-9e8f-deef83531eb9.json b/data/hfopenllm_v2/J-LAB/Thynk_orpo/3565fba3-e63d-49f8-9e8f-deef83531eb9.json new file mode 100644 index 000000000..c67d593c2 --- /dev/null +++ b/data/hfopenllm_v2/J-LAB/Thynk_orpo/3565fba3-e63d-49f8-9e8f-deef83531eb9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/J-LAB_Thynk_orpo/1762652579.649622", + "retrieved_timestamp": "1762652579.6496232", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "J-LAB/Thynk_orpo", + "developer": "J-LAB", + "inference_platform": "unknown", + "id": "J-LAB/Thynk_orpo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21017788357114678 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44631138778709606 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14803625377643503 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45147916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32313829787234044 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jacoby746/Casual-Magnum-34B/0b9358f8-1e27-448f-9932-1f2c6feac036.json b/data/hfopenllm_v2/Jacoby746/Casual-Magnum-34B/0b9358f8-1e27-448f-9932-1f2c6feac036.json new file mode 100644 index 000000000..aa365b17d --- /dev/null +++ b/data/hfopenllm_v2/Jacoby746/Casual-Magnum-34B/0b9358f8-1e27-448f-9932-1f2c6feac036.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Jacoby746_Casual-Magnum-34B/1762652579.65033", + "retrieved_timestamp": "1762652579.6503308", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jacoby746/Casual-Magnum-34B", + "developer": "Jacoby746", + "inference_platform": "unknown", + "id": "Jacoby746/Casual-Magnum-34B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19301675110927893 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6032046880542974 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09214501510574018 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3724832214765101 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4077604166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5183676861702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B/d1fa6abf-be2b-4ea6-bcbe-066ac37aa54f.json b/data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B/d1fa6abf-be2b-4ea6-bcbe-066ac37aa54f.json new file mode 100644 index 000000000..c0b5bf2af --- /dev/null +++ b/data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B/d1fa6abf-be2b-4ea6-bcbe-066ac37aa54f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Jacoby746_Inf-Silent-Kunoichi-v0.1-2x7B/1762652579.6505952", + "retrieved_timestamp": "1762652579.6505961", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B", + "developer": "Jacoby746", + "inference_platform": "unknown", + "id": "Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38798166642286913 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.518546209727402 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07099697885196375 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42804166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271276595744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B/f611991b-11c1-4232-bc63-8cf2942605ae.json b/data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B/f611991b-11c1-4232-bc63-8cf2942605ae.json new file mode 100644 index 000000000..3989ce320 --- /dev/null +++ b/data/hfopenllm_v2/Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B/f611991b-11c1-4232-bc63-8cf2942605ae.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Jacoby746_Inf-Silent-Kunoichi-v0.2-2x7B/1762652579.650832", + "retrieved_timestamp": "1762652579.650833", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B", + "developer": "Jacoby746", + "inference_platform": "unknown", + "id": "Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3636019095998617 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5209417299963208 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06268882175226587 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43197916666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32721077127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jacoby746/Proto-Athena-4x7B/27d9d5c2-39d8-45e5-9614-a343144f05d8.json b/data/hfopenllm_v2/Jacoby746/Proto-Athena-4x7B/27d9d5c2-39d8-45e5-9614-a343144f05d8.json new file mode 100644 index 000000000..af4a11e7b --- /dev/null +++ b/data/hfopenllm_v2/Jacoby746/Proto-Athena-4x7B/27d9d5c2-39d8-45e5-9614-a343144f05d8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Jacoby746_Proto-Athena-4x7B/1762652579.651071", + "retrieved_timestamp": "1762652579.651072", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jacoby746/Proto-Athena-4x7B", + "developer": "Jacoby746", + "inference_platform": "unknown", + "id": "Jacoby746/Proto-Athena-4x7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37029636918930664 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5106547638742905 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43477083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32064494680851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jacoby746/Proto-Athena-v0.2-4x7B/060feab1-4ce6-44a9-8ae2-c06468dd4dc9.json b/data/hfopenllm_v2/Jacoby746/Proto-Athena-v0.2-4x7B/060feab1-4ce6-44a9-8ae2-c06468dd4dc9.json new file mode 100644 index 000000000..b9d97bcdb --- /dev/null +++ b/data/hfopenllm_v2/Jacoby746/Proto-Athena-v0.2-4x7B/060feab1-4ce6-44a9-8ae2-c06468dd4dc9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Jacoby746_Proto-Athena-v0.2-4x7B/1762652579.651291", + "retrieved_timestamp": "1762652579.6512918", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jacoby746/Proto-Athena-v0.2-4x7B", + "developer": "Jacoby746", + "inference_platform": "unknown", + "id": "Jacoby746/Proto-Athena-v0.2-4x7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37524213531208306 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5067731005424964 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42128125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3197307180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B/f7455f30-e04e-4bc6-9d71-e33272d4577c.json b/data/hfopenllm_v2/Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B/f7455f30-e04e-4bc6-9d71-e33272d4577c.json new file mode 100644 index 000000000..dac4e9f49 --- /dev/null +++ b/data/hfopenllm_v2/Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B/f7455f30-e04e-4bc6-9d71-e33272d4577c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Jacoby746_Proto-Harpy-Blazing-Light-v0.1-2x7B/1762652579.651509", + "retrieved_timestamp": "1762652579.65151", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B", + "developer": "Jacoby746", + "inference_platform": "unknown", + "id": "Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4904719477652628 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5186849053052595 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07477341389728097 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44496874999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33011968085106386 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jacoby746/Proto-Harpy-Spark-v0.1-7B/420cf07c-f043-49db-a62d-91e0c21aff2f.json b/data/hfopenllm_v2/Jacoby746/Proto-Harpy-Spark-v0.1-7B/420cf07c-f043-49db-a62d-91e0c21aff2f.json new file mode 100644 index 000000000..0d64a109d --- /dev/null +++ b/data/hfopenllm_v2/Jacoby746/Proto-Harpy-Spark-v0.1-7B/420cf07c-f043-49db-a62d-91e0c21aff2f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Jacoby746_Proto-Harpy-Spark-v0.1-7B/1762652579.651721", + "retrieved_timestamp": "1762652579.651722", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jacoby746/Proto-Harpy-Spark-v0.1-7B", + "developer": "Jacoby746", + "inference_platform": "unknown", + "id": "Jacoby746/Proto-Harpy-Spark-v0.1-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43326928106313467 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4735771808296548 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.061933534743202415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43166666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30693151595744683 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-1epoch/7da8cc7e-791f-420d-9004-b29ddf54e381.json b/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-1epoch/7da8cc7e-791f-420d-9004-b29ddf54e381.json new file mode 100644 index 000000000..ea513cdb8 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-1epoch/7da8cc7e-791f-420d-9004-b29ddf54e381.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-DPO-1epoch/1762652579.651926", + "retrieved_timestamp": "1762652579.651926", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen-0.5B-DPO-1epoch", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen-0.5B-DPO-1epoch" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26473313031644924 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31907502434278595 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028700906344410877 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33517708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15575132978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-5epoch/42960491-549f-42bb-9669-5231ca0c436b.json b/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-5epoch/42960491-549f-42bb-9669-5231ca0c436b.json new file mode 100644 index 000000000..5ed4c54e5 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen-0.5B-DPO-5epoch/42960491-549f-42bb-9669-5231ca0c436b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-DPO-5epoch/1762652579.65218", + "retrieved_timestamp": "1762652579.652181", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen-0.5B-DPO-5epoch", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen-0.5B-DPO-5epoch" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25701472094043804 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3112109544868782 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24328859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33796875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15325797872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/46c6ab7f-33a0-4e72-9a63-b24da3f9c4d6.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/46c6ab7f-33a0-4e72-9a63-b24da3f9c4d6.json new file mode 100644 index 000000000..c54954968 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/46c6ab7f-33a0-4e72-9a63-b24da3f9c4d6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/1762652579.653574", + "retrieved_timestamp": "1762652579.653575", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24687274210206694 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3260313037664168 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06495468277945618 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34336458333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1574966755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/1ff4251b-d01a-4ced-8868-776210e1ecb6.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/1ff4251b-d01a-4ced-8868-776210e1ecb6.json new file mode 100644 index 000000000..780b927ee --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/1ff4251b-d01a-4ced-8868-776210e1ecb6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/1762652579.6538298", + "retrieved_timestamp": "1762652579.6538298", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2605863553150086 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3308028437367363 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04984894259818731 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16256648936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/c3c5cb61-3c4f-4796-9d3c-493618db0f91.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/c3c5cb61-3c4f-4796-9d3c-493618db0f91.json new file mode 100644 index 000000000..31939aed0 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/c3c5cb61-3c4f-4796-9d3c-493618db0f91.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/1762652579.654063", + "retrieved_timestamp": "1762652579.6540642", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2529178136234081 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3261949089625076 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.330125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15757978723404256 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT/48e6f9aa-5034-4653-8832-b0a16bf01079.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT/48e6f9aa-5034-4653-8832-b0a16bf01079.json new file mode 100644 index 000000000..b4c1ff825 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-Instruct-SFT/48e6f9aa-5034-4653-8832-b0a16bf01079.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-Instruct-SFT/1762652579.65331", + "retrieved_timestamp": "1762652579.653311", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27677340567472086 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3253697801563151 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03927492447129909 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33415625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15201130319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep/00efca13-0d04-4700-a90f-bd621a971555.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep/00efca13-0d04-4700-a90f-bd621a971555.json new file mode 100644 index 000000000..bf9ed72a2 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep/00efca13-0d04-4700-a90f-bd621a971555.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-2ep/1762652579.654743", + "retrieved_timestamp": "1762652579.6547441", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2140498322229462 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3172227797719337 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24664429530201343 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34727083333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15367353723404256 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep/f357f4eb-1837-4ab2-ad4b-9cc8a9054517.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep/f357f4eb-1837-4ab2-ad4b-9cc8a9054517.json new file mode 100644 index 000000000..1966e91ff --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep/f357f4eb-1837-4ab2-ad4b-9cc8a9054517.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-3ep/1762652579.6549618", + "retrieved_timestamp": "1762652579.654963", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22573992561957826 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3064261556890236 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2483221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36606249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15317486702127658 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep/206c756e-1edc-491f-9f86-7e00c7ab7085.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep/206c756e-1edc-491f-9f86-7e00c7ab7085.json new file mode 100644 index 000000000..81e0cdb92 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep/206c756e-1edc-491f-9f86-7e00c7ab7085.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-5ep/1762652579.655172", + "retrieved_timestamp": "1762652579.655173", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19868726091215752 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31044747322019184 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3406666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15575132978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4/7d591ed9-5802-43a3-bb38-ec45b69adb08.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4/7d591ed9-5802-43a3-bb38-ec45b69adb08.json new file mode 100644 index 000000000..d90084860 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-4/7d591ed9-5802-43a3-bb38-ec45b69adb08.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-4/1762652579.654527", + "retrieved_timestamp": "1762652579.654527", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2019596891802639 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3017092819749249 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0188821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25083892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3446354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16190159574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep/fde79985-6832-4315-8650-fdcf9ad68087.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep/fde79985-6832-4315-8650-fdcf9ad68087.json new file mode 100644 index 000000000..df967183f --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep/fde79985-6832-4315-8650-fdcf9ad68087.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-2ep/1762652579.655605", + "retrieved_timestamp": "1762652579.655606", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19706379074189817 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3224699194774388 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.052870090634441085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3367604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1651429521276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep/aef8fd41-ac51-4fb5-b8ae-78ebca9b4215.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep/aef8fd41-ac51-4fb5-b8ae-78ebca9b4215.json new file mode 100644 index 000000000..5ccf89b42 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep/aef8fd41-ac51-4fb5-b8ae-78ebca9b4215.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-3ep/1762652579.655815", + "retrieved_timestamp": "1762652579.6558158", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2241164554493189 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32468117082421427 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3353333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16888297872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep/b5cdb9c2-d81a-4e0b-817a-3e101d122e7a.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep/b5cdb9c2-d81a-4e0b-817a-3e101d122e7a.json new file mode 100644 index 000000000..33f1c3627 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep/b5cdb9c2-d81a-4e0b-817a-3e101d122e7a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-5ep/1762652579.656047", + "retrieved_timestamp": "1762652579.656048", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22918744486850445 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3259343389530942 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05211480362537765 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3235208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16879986702127658 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5/3eac4497-66af-4fc6-bf89-459631e4a418.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5/3eac4497-66af-4fc6-bf89-459631e4a418.json new file mode 100644 index 000000000..ee3093249 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-1e-5/3eac4497-66af-4fc6-bf89-459631e4a418.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-5/1762652579.6553931", + "retrieved_timestamp": "1762652579.655394", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1985875255433361 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3139860294769257 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0377643504531722 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34603125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1697972074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep/9d58433f-a74c-4345-bd47-a8f2c4e2361e.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep/9d58433f-a74c-4345-bd47-a8f2c4e2361e.json new file mode 100644 index 000000000..adac30fbe --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep/9d58433f-a74c-4345-bd47-a8f2c4e2361e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-2ep/1762652579.656457", + "retrieved_timestamp": "1762652579.656457", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18307535117931534 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29839616748934167 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.024924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2424496644295302 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3567604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1484375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep/e8109e5c-6276-4935-bfa0-fc969f118d3b.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep/e8109e5c-6276-4935-bfa0-fc969f118d3b.json new file mode 100644 index 000000000..66821f8bf --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep/e8109e5c-6276-4935-bfa0-fc969f118d3b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-3ep/1762652579.656671", + "retrieved_timestamp": "1762652579.656672", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1989620872617987 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3109875129533253 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3449479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14162234042553193 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep/9d6b36c5-c0ec-4ab1-a12b-47efc34ebfc8.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep/9d6b36c5-c0ec-4ab1-a12b-47efc34ebfc8.json new file mode 100644 index 000000000..872b24037 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep/9d6b36c5-c0ec-4ab1-a12b-47efc34ebfc8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-5ep/1762652579.656877", + "retrieved_timestamp": "1762652579.656878", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18971994308434953 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936418449815176 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01812688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38739583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13364361702127658 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4/5e307ea5-70da-476a-8d9e-1d488385565f.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4/5e307ea5-70da-476a-8d9e-1d488385565f.json new file mode 100644 index 000000000..6e55d96ce --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-4/5e307ea5-70da-476a-8d9e-1d488385565f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-4/1762652579.656255", + "retrieved_timestamp": "1762652579.656256", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2034335562972912 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2935549587263229 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02416918429003021 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3434270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14128989361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/343b7db1-8f96-4998-a6fb-5eb0aa1b6b21.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/343b7db1-8f96-4998-a6fb-5eb0aa1b6b21.json new file mode 100644 index 000000000..b011fb41e --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/343b7db1-8f96-4998-a6fb-5eb0aa1b6b21.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/1762652579.6580968", + "retrieved_timestamp": "1762652579.658098", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24105262924595627 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31671815484837784 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.330125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15625 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/bfa11262-d7bd-44b3-8b8b-81013f1e0c24.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/bfa11262-d7bd-44b3-8b8b-81013f1e0c24.json new file mode 100644 index 000000000..10ec81914 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/bfa11262-d7bd-44b3-8b8b-81013f1e0c24.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/1762652579.658342", + "retrieved_timestamp": "1762652579.6583428", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23685598656010498 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3260038632940968 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3355208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15699800531914893 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/902849f8-dc58-4e01-ba30-ff95412272d3.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/902849f8-dc58-4e01-ba30-ff95412272d3.json new file mode 100644 index 000000000..7a6c3be96 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/902849f8-dc58-4e01-ba30-ff95412272d3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/1762652579.6585789", + "retrieved_timestamp": "1762652579.65858", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22623971063444992 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3261540051256346 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3408229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15408909574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/4c5cace1-70ce-48f3-aad1-d141924c24de.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/4c5cace1-70ce-48f3-aad1-d141924c24de.json new file mode 100644 index 000000000..10973e16c --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/4c5cace1-70ce-48f3-aad1-d141924c24de.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/1762652579.6588218", + "retrieved_timestamp": "1762652579.658823", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25079455843827714 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3199331515135054 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33545833333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15550199468085107 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/e42051f2-90f2-4fbe-a4bd-623482abf10f.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/e42051f2-90f2-4fbe-a4bd-623482abf10f.json new file mode 100644 index 000000000..63a1ad557 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/e42051f2-90f2-4fbe-a4bd-623482abf10f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/1762652579.6591082", + "retrieved_timestamp": "1762652579.659109", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.238979241745236 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31816042712158116 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33279166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15600066489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/e70423b6-5a7d-4745-b5a3-968f363a3b7a.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/e70423b6-5a7d-4745-b5a3-968f363a3b7a.json new file mode 100644 index 000000000..eea17ecf3 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/e70423b6-5a7d-4745-b5a3-968f363a3b7a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/1762652579.6593359", + "retrieved_timestamp": "1762652579.659337", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2423015376977531 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3154080373582542 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33279166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15475398936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/2a7b8fa7-5c16-414b-968e-ec7b06e8143c.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/2a7b8fa7-5c16-414b-968e-ec7b06e8143c.json new file mode 100644 index 000000000..2cfbbd2f5 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/2a7b8fa7-5c16-414b-968e-ec7b06e8143c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/1762652579.6595562", + "retrieved_timestamp": "1762652579.659557", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24932069132124984 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3189717077702392 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.334125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15608377659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/dfa1b391-4b18-4ac0-a397-a983070647a7.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/dfa1b391-4b18-4ac0-a397-a983070647a7.json new file mode 100644 index 000000000..c6c104c71 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/dfa1b391-4b18-4ac0-a397-a983070647a7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/1762652579.660001", + "retrieved_timestamp": "1762652579.660005", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2541667220752049 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31671883869615397 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32885416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15799534574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/96d31674-0011-4621-9131-31b5f6ede223.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/96d31674-0011-4621-9131-31b5f6ede223.json new file mode 100644 index 000000000..1e10943dc --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/96d31674-0011-4621-9131-31b5f6ede223.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/1762652579.660342", + "retrieved_timestamp": "1762652579.660342", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24507418095098782 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3159533058861391 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3301875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15608377659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/d8663966-a5f5-40e6-a327-1255f7c3395f.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/d8663966-a5f5-40e6-a327-1255f7c3395f.json new file mode 100644 index 000000000..665c31d06 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/d8663966-a5f5-40e6-a327-1255f7c3395f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/1762652579.6605709", + "retrieved_timestamp": "1762652579.6605718", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25574032456105356 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31419826948787827 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3315208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1574966755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/a1fadf30-c543-4b73-bf28-0cb9cb2fc91f.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/a1fadf30-c543-4b73-bf28-0cb9cb2fc91f.json new file mode 100644 index 000000000..934cd9bb3 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/a1fadf30-c543-4b73-bf28-0cb9cb2fc91f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/1762652579.660821", + "retrieved_timestamp": "1762652579.660822", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26053648763059795 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3166968072745491 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03625377643504532 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.334125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15766289893617022 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/57b69bd0-73f6-42e0-bd9e-984bb1e6a553.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/57b69bd0-73f6-42e0-bd9e-984bb1e6a553.json new file mode 100644 index 000000000..75b1ca096 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/57b69bd0-73f6-42e0-bd9e-984bb1e6a553.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/1762652579.661046", + "retrieved_timestamp": "1762652579.661047", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25781371206177384 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31732037273750646 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.035498489425981876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32879166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1583277925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/93597efa-6da8-4074-8049-6ec66f499cbf.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/93597efa-6da8-4074-8049-6ec66f499cbf.json new file mode 100644 index 000000000..08b3069ad --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/93597efa-6da8-4074-8049-6ec66f499cbf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/1762652579.661258", + "retrieved_timestamp": "1762652579.661259", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23353369060758786 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3197619098572027 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348994 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32755208333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1580784574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/00a5dc4a-6ffb-4e6a-9547-416ff29e0ded.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/00a5dc4a-6ffb-4e6a-9547-416ff29e0ded.json new file mode 100644 index 000000000..2d14bdae9 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/00a5dc4a-6ffb-4e6a-9547-416ff29e0ded.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/1762652579.6614761", + "retrieved_timestamp": "1762652579.6614761", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24719743613611883 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32262707839652854 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32621875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15375664893617022 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/13cf92c4-fbeb-445a-85d6-bf71ce2e68c9.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/13cf92c4-fbeb-445a-85d6-bf71ce2e68c9.json new file mode 100644 index 000000000..f3e887267 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/13cf92c4-fbeb-445a-85d6-bf71ce2e68c9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/1762652579.661691", + "retrieved_timestamp": "1762652579.661692", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2474223948013493 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32291208173140107 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04154078549848943 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32748958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15392287234042554 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/14a173b6-4d56-4d22-a888-57ea46d72e67.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/14a173b6-4d56-4d22-a888-57ea46d72e67.json new file mode 100644 index 000000000..70d5aec59 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/14a173b6-4d56-4d22-a888-57ea46d72e67.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/1762652579.6619039", + "retrieved_timestamp": "1762652579.6619048", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24027801788144343 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32453683161596314 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32621875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1573304521276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/f46cc7cb-27e8-4723-9ecf-cbeef9789b25.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/f46cc7cb-27e8-4723-9ecf-cbeef9789b25.json new file mode 100644 index 000000000..661a50206 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/f46cc7cb-27e8-4723-9ecf-cbeef9789b25.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/1762652579.662116", + "retrieved_timestamp": "1762652579.662117", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23680611887569425 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3224293761524927 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04607250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33548958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15159574468085107 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/de200bef-71a2-4efb-bc34-02f69385b636.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/de200bef-71a2-4efb-bc34-02f69385b636.json new file mode 100644 index 000000000..e2c666a3c --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/de200bef-71a2-4efb-bc34-02f69385b636.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/1762652579.662327", + "retrieved_timestamp": "1762652579.662327", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23718068059415687 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32477052921998556 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3394270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1550033244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/7ed1ff6a-fe4d-4f78-bbc6-c5e64a7fbfc1.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/7ed1ff6a-fe4d-4f78-bbc6-c5e64a7fbfc1.json new file mode 100644 index 000000000..6dbffccf7 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/7ed1ff6a-fe4d-4f78-bbc6-c5e64a7fbfc1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/1762652579.6625469", + "retrieved_timestamp": "1762652579.662548", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24992021170494289 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31806007750183346 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04154078549848943 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15741356382978725 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/82d38084-32b1-4224-810c-b66dd337b3fe.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/82d38084-32b1-4224-810c-b66dd337b3fe.json new file mode 100644 index 000000000..44d7b9796 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/82d38084-32b1-4224-810c-b66dd337b3fe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/1762652579.662755", + "retrieved_timestamp": "1762652579.662755", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23810489501190177 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32421844512358233 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04984894259818731 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3328229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15724734042553193 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/972e0d76-63bb-431b-9d9b-68dd6b738447.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/972e0d76-63bb-431b-9d9b-68dd6b738447.json new file mode 100644 index 000000000..ed1719615 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/972e0d76-63bb-431b-9d9b-68dd6b738447.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/1762652579.662969", + "retrieved_timestamp": "1762652579.662969", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2420765790325226 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3224798177796032 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3408229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14960106382978725 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/7337bc31-54b6-43b9-bb26-63f2273ffc7e.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/7337bc31-54b6-43b9-bb26-63f2273ffc7e.json new file mode 100644 index 000000000..7304968d3 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/7337bc31-54b6-43b9-bb26-63f2273ffc7e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/1762652579.663178", + "retrieved_timestamp": "1762652579.663179", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23805502732749106 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32652003776870003 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0445619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34079166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14985039893617022 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/c2e14e90-6c18-4a9f-9d68-a9d98960dd32.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/c2e14e90-6c18-4a9f-9d68-a9d98960dd32.json new file mode 100644 index 000000000..83441688d --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/c2e14e90-6c18-4a9f-9d68-a9d98960dd32.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/1762652579.663386", + "retrieved_timestamp": "1762652579.663386", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25264298727376694 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3176911636441555 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33415625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15724734042553193 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/972d45c5-acd1-4e54-8310-9ff56c5fb061.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/972d45c5-acd1-4e54-8310-9ff56c5fb061.json new file mode 100644 index 000000000..91f3781a8 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/972d45c5-acd1-4e54-8310-9ff56c5fb061.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/1762652579.6636329", + "retrieved_timestamp": "1762652579.6636338", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24572356901909154 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.316045450978746 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0445619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33015625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15716422872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/2faf738f-64f4-4e14-8011-9e00a4e2dd6a.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/2faf738f-64f4-4e14-8011-9e00a4e2dd6a.json new file mode 100644 index 000000000..df7b41e16 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/2faf738f-64f4-4e14-8011-9e00a4e2dd6a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/1762652579.663875", + "retrieved_timestamp": "1762652579.663876", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2441998342176536 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3193544697854515 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04833836858006042 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33148958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1566655585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/15b28d99-e02a-4021-899b-adef87dfe96a.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/15b28d99-e02a-4021-899b-adef87dfe96a.json new file mode 100644 index 000000000..68a2f4285 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/15b28d99-e02a-4021-899b-adef87dfe96a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/1762652579.6641018", + "retrieved_timestamp": "1762652579.664103", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26036139664977814 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31784656431310543 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.035498489425981876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15674867021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/b643171e-adaa-4f6e-8860-542950810578.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/b643171e-adaa-4f6e-8860-542950810578.json new file mode 100644 index 000000000..36b43cb3e --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/b643171e-adaa-4f6e-8860-542950810578.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/1762652579.664332", + "retrieved_timestamp": "1762652579.664333", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24899599728719796 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3172899997448431 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03927492447129909 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3301875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15691489361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/a26204c0-90c5-44fd-8814-d69c6e4f4585.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/a26204c0-90c5-44fd-8814-d69c6e4f4585.json new file mode 100644 index 000000000..8d12dd0b0 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/a26204c0-90c5-44fd-8814-d69c6e4f4585.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/1762652579.6645608", + "retrieved_timestamp": "1762652579.664562", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26036139664977814 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3149566664115098 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0377643504531722 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3341875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15658244680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/bc45fc30-c472-471a-b0c8-f68b9397d844.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/bc45fc30-c472-471a-b0c8-f68b9397d844.json new file mode 100644 index 000000000..71ca238e0 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/bc45fc30-c472-471a-b0c8-f68b9397d844.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/1762652579.664829", + "retrieved_timestamp": "1762652579.66483", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2550410688085391 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3211026993947845 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04909365558912387 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32876041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15708111702127658 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/dff1ec0f-99a6-493d-9f2c-a6a523455b7e.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/dff1ec0f-99a6-493d-9f2c-a6a523455b7e.json new file mode 100644 index 000000000..a44d8dc1f --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/dff1ec0f-99a6-493d-9f2c-a6a523455b7e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/1762652579.665046", + "retrieved_timestamp": "1762652579.665047", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24779695651981187 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3197773660515741 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04229607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33145833333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15866023936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/a6385d82-407e-44b2-9148-9cbf8f353557.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/a6385d82-407e-44b2-9148-9cbf8f353557.json new file mode 100644 index 000000000..c31a79566 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/a6385d82-407e-44b2-9148-9cbf8f353557.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/1762652579.6652648", + "retrieved_timestamp": "1762652579.665266", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24747226248576 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32246983072126806 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.330125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15558510638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/17fb5411-3dc6-44b7-971b-8a080ed93de0.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/17fb5411-3dc6-44b7-971b-8a080ed93de0.json new file mode 100644 index 000000000..80e4a73e8 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/17fb5411-3dc6-44b7-971b-8a080ed93de0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/1762652579.665471", + "retrieved_timestamp": "1762652579.665472", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2590127528291599 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3185132309797721 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03625377643504532 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3275208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15857712765957446 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/670b89a5-2a83-480e-a33b-6903609a10dc.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/670b89a5-2a83-480e-a33b-6903609a10dc.json new file mode 100644 index 000000000..def76622b --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/670b89a5-2a83-480e-a33b-6903609a10dc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/1762652579.665683", + "retrieved_timestamp": "1762652579.665684", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23233464984020177 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179474145066817 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15475398936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/e660922f-847b-4993-91a4-b96809ff1e85.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/e660922f-847b-4993-91a4-b96809ff1e85.json new file mode 100644 index 000000000..6c72674f8 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/e660922f-847b-4993-91a4-b96809ff1e85.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/1762652579.665889", + "retrieved_timestamp": "1762652579.66589", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23151017079127825 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3259705145690442 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04154078549848943 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3383125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15209441489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/41d18fa1-d19e-47cf-8fec-b04725ff097f.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/41d18fa1-d19e-47cf-8fec-b04725ff097f.json new file mode 100644 index 000000000..99799658c --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/41d18fa1-d19e-47cf-8fec-b04725ff097f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/1762652579.666097", + "retrieved_timestamp": "1762652579.6660979", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2298368329366082 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33204616486918276 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33288541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15674867021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/122a997d-f452-4511-96f3-f31ecb5d8d7b.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/122a997d-f452-4511-96f3-f31ecb5d8d7b.json new file mode 100644 index 000000000..293f57050 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/122a997d-f452-4511-96f3-f31ecb5d8d7b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/1762652579.666312", + "retrieved_timestamp": "1762652579.666313", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24687274210206694 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3178544697854515 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04154078549848943 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33015625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1574966755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/c0d7514b-6809-49d7-9193-38e9c9ad03be.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/c0d7514b-6809-49d7-9193-38e9c9ad03be.json new file mode 100644 index 000000000..56db8cb81 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/c0d7514b-6809-49d7-9193-38e9c9ad03be.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/1762652579.666527", + "retrieved_timestamp": "1762652579.666527", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2520434668900739 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3167822100533442 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03700906344410876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3328229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15757978723404256 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/923f6446-f9fb-47ae-b585-ac131d75c107.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/923f6446-f9fb-47ae-b585-ac131d75c107.json new file mode 100644 index 000000000..78987f399 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/923f6446-f9fb-47ae-b585-ac131d75c107.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/1762652579.6667368", + "retrieved_timestamp": "1762652579.666738", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2665815591519391 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3190675981811982 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32885416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1566655585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/da330322-f144-44bb-833a-7b92c11f3888.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/da330322-f144-44bb-833a-7b92c11f3888.json new file mode 100644 index 000000000..4de917bf9 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/da330322-f144-44bb-833a-7b92c11f3888.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/1762652579.667231", + "retrieved_timestamp": "1762652579.667236", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24992021170494289 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31779941873624934 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03700906344410876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.334125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15625 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/10014f98-cae2-435b-b6e7-17064bb079a5.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/10014f98-cae2-435b-b6e7-17064bb079a5.json new file mode 100644 index 000000000..3d3080b24 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/10014f98-cae2-435b-b6e7-17064bb079a5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/1762652579.6676302", + "retrieved_timestamp": "1762652579.6676311", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24170201731406002 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3178391594145879 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33279166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1574966755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/c6d4f510-abc8-4524-99b0-e6d98c6e9aa9.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/c6d4f510-abc8-4524-99b0-e6d98c6e9aa9.json new file mode 100644 index 000000000..a4f4ad5fd --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/c6d4f510-abc8-4524-99b0-e6d98c6e9aa9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/1762652579.66787", + "retrieved_timestamp": "1762652579.667871", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2562401095759252 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31904280434381205 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04229607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.334125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15757978723404256 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/b4d7f827-d1cb-46c6-9eea-248867fdc07f.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/b4d7f827-d1cb-46c6-9eea-248867fdc07f.json new file mode 100644 index 000000000..67331d716 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/b4d7f827-d1cb-46c6-9eea-248867fdc07f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/1762652579.6680949", + "retrieved_timestamp": "1762652579.6680949", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2408276705807258 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31647277641099675 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3315208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1556682180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/d1d2f75d-ddd8-42cb-9de8-1f327479eb9b.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/d1d2f75d-ddd8-42cb-9de8-1f327479eb9b.json new file mode 100644 index 000000000..618840afd --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/d1d2f75d-ddd8-42cb-9de8-1f327479eb9b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/1762652579.668304", + "retrieved_timestamp": "1762652579.668305", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24812165055386376 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3204166266783764 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3301875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15915890957446807 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/9df1e491-fa9d-41c7-ae46-8cc70a47a60f.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/9df1e491-fa9d-41c7-ae46-8cc70a47a60f.json new file mode 100644 index 000000000..8c0f6f95c --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/9df1e491-fa9d-41c7-ae46-8cc70a47a60f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/1762652579.668525", + "retrieved_timestamp": "1762652579.6685262", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2544914161092568 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3185709286639082 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04984894259818731 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32885416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15608377659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/6c070a2b-9f5e-46cd-b8ba-b6220509b85d.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/6c070a2b-9f5e-46cd-b8ba-b6220509b85d.json new file mode 100644 index 000000000..275cfeeed --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/6c070a2b-9f5e-46cd-b8ba-b6220509b85d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/1762652579.668755", + "retrieved_timestamp": "1762652579.668756", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2519935992056632 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.320368681472897 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03927492447129909 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32615625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15375664893617022 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/4496da44-d4bd-40a8-8f91-56b2cb2fa766.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/4496da44-d4bd-40a8-8f91-56b2cb2fa766.json new file mode 100644 index 000000000..5fb7abe0c --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/4496da44-d4bd-40a8-8f91-56b2cb2fa766.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/1762652579.6689868", + "retrieved_timestamp": "1762652579.668988", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23146030310686755 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32128474090743103 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32221875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15824468085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/69c6593c-6e84-498f-8d68-62c1809a4606.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/69c6593c-6e84-498f-8d68-62c1809a4606.json new file mode 100644 index 000000000..eca99f8af --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/69c6593c-6e84-498f-8d68-62c1809a4606.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/1762652579.669204", + "retrieved_timestamp": "1762652579.669204", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25149381419079153 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31867127828365593 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32888541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15392287234042554 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/b1c0f775-987a-4da5-9451-09bf295b16ba.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/b1c0f775-987a-4da5-9451-09bf295b16ba.json new file mode 100644 index 000000000..4c7ae83da --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/b1c0f775-987a-4da5-9451-09bf295b16ba.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/1762652579.669419", + "retrieved_timestamp": "1762652579.66942", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24719743613611883 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213274785812292 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3261875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15882646276595744 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/c589d3d6-9d8b-45e3-a6c6-60f25d44349b.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/c589d3d6-9d8b-45e3-a6c6-60f25d44349b.json new file mode 100644 index 000000000..ec9e8d468 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/c589d3d6-9d8b-45e3-a6c6-60f25d44349b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/1762652579.6696231", + "retrieved_timestamp": "1762652579.669624", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24599839536873275 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32337658694524307 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0377643504531722 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33021875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15334109042553193 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/1e76e5ee-1728-4756-8f13-d68ce1ca3a5e.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/1e76e5ee-1728-4756-8f13-d68ce1ca3a5e.json new file mode 100644 index 000000000..74d17a636 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/1e76e5ee-1728-4756-8f13-d68ce1ca3a5e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/1762652579.669835", + "retrieved_timestamp": "1762652579.669836", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25236816092412573 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3255638228201855 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33679166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15309175531914893 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/a44985f9-2255-421b-93b9-fcb5761e17b8.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/a44985f9-2255-421b-93b9-fcb5761e17b8.json new file mode 100644 index 000000000..c17027af3 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/a44985f9-2255-421b-93b9-fcb5761e17b8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/1762652579.670048", + "retrieved_timestamp": "1762652579.670049", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2264646692996804 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3252098558034601 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32615625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1568317819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/ad59cc80-784d-41bf-9a3e-9d9f286667d2.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/ad59cc80-784d-41bf-9a3e-9d9f286667d2.json new file mode 100644 index 000000000..209d0c973 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/ad59cc80-784d-41bf-9a3e-9d9f286667d2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/1762652579.6702561", + "retrieved_timestamp": "1762652579.6702569", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23016152697066006 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3224479825736107 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34079166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15001662234042554 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/0b72d3c8-aaff-4eca-854d-07d132e9aa25.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/0b72d3c8-aaff-4eca-854d-07d132e9aa25.json new file mode 100644 index 000000000..1a86e6808 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/0b72d3c8-aaff-4eca-854d-07d132e9aa25.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/1762652579.670511", + "retrieved_timestamp": "1762652579.6705122", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25236816092412573 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3278027492189594 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33945833333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15209441489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/021eca20-1a26-4eba-9006-fb005e91696d.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/021eca20-1a26-4eba-9006-fb005e91696d.json new file mode 100644 index 000000000..4542ead2c --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/021eca20-1a26-4eba-9006-fb005e91696d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/1762652579.67072", + "retrieved_timestamp": "1762652579.67072", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2657570801030156 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31752113645211816 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03625377643504532 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3301875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1574966755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/8662faaa-8964-468a-991b-43b2f0449d48.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/8662faaa-8964-468a-991b-43b2f0449d48.json new file mode 100644 index 000000000..b253d3bca --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/8662faaa-8964-468a-991b-43b2f0449d48.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/1762652579.6709208", + "retrieved_timestamp": "1762652579.6709208", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2487211709375568 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3189091360416723 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0377643504531722 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3275208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15949135638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/56cad8c7-566f-46e5-9692-3c11f4408921.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/56cad8c7-566f-46e5-9692-3c11f4408921.json new file mode 100644 index 000000000..7e0155335 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/56cad8c7-566f-46e5-9692-3c11f4408921.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/1762652579.671123", + "retrieved_timestamp": "1762652579.671123", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2560151509106947 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3158776856286612 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0377643504531722 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3275208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15616688829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/f86fb81b-29b8-425f-8129-ea054108a214.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/f86fb81b-29b8-425f-8129-ea054108a214.json new file mode 100644 index 000000000..8bd27e8eb --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/f86fb81b-29b8-425f-8129-ea054108a214.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/1762652579.671335", + "retrieved_timestamp": "1762652579.671336", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2498703440205322 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31561997255280577 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3301875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15558510638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/3c5ff9bc-b33a-4557-9c76-ccc041de985c.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/3c5ff9bc-b33a-4557-9c76-ccc041de985c.json new file mode 100644 index 000000000..8d08a071a --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/3c5ff9bc-b33a-4557-9c76-ccc041de985c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/1762652579.671542", + "retrieved_timestamp": "1762652579.6715431", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.249595517670891 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31774285416798703 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33148958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1566655585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/64e0c863-f33c-44d7-b244-e5288e5018fb.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/64e0c863-f33c-44d7-b244-e5288e5018fb.json new file mode 100644 index 000000000..a1b8934c6 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/64e0c863-f33c-44d7-b244-e5288e5018fb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/1762652579.6717582", + "retrieved_timestamp": "1762652579.6717582", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25149381419079153 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3172338500122228 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3275208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15533577127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep/09f59d70-2948-4eb6-a14e-2550c97b5542.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep/09f59d70-2948-4eb6-a14e-2550c97b5542.json new file mode 100644 index 000000000..087019ad6 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep/09f59d70-2948-4eb6-a14e-2550c97b5542.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep/1762652579.6576698", + "retrieved_timestamp": "1762652579.657671", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2201447714286981 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3217197270809481 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33669791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17096077127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep/57d9c59d-8cd8-4253-a076-8b16becc740e.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep/57d9c59d-8cd8-4253-a076-8b16becc740e.json new file mode 100644 index 000000000..61d07356b --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep/57d9c59d-8cd8-4253-a076-8b16becc740e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-3ep/1762652579.671975", + "retrieved_timestamp": "1762652579.671975", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22808813946993975 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3239538094779519 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.330125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17461768617021275 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/5fb209a6-3d82-4017-8e44-3615d7c50218.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/5fb209a6-3d82-4017-8e44-3615d7c50218.json new file mode 100644 index 000000000..1b753d0fc --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/5fb209a6-3d82-4017-8e44-3615d7c50218.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/1762652579.672395", + "retrieved_timestamp": "1762652579.672396", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25259311958935626 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.323809171214906 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03927492447129909 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3528229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15741356382978725 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/2ccd9994-1d9c-40c4-85d0-c74af7544b6d.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/2ccd9994-1d9c-40c4-85d0-c74af7544b6d.json new file mode 100644 index 000000000..8b7bde603 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/2ccd9994-1d9c-40c4-85d0-c74af7544b6d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/1762652579.672603", + "retrieved_timestamp": "1762652579.6726038", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24812165055386376 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31748404240871353 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34752083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15965757978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/1f1f5c3d-4ee4-4ed8-adeb-9e83942a7e32.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/1f1f5c3d-4ee4-4ed8-adeb-9e83942a7e32.json new file mode 100644 index 000000000..67e79002e --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/1f1f5c3d-4ee4-4ed8-adeb-9e83942a7e32.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/1762652579.672818", + "retrieved_timestamp": "1762652579.672818", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25476624245889795 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3199073234678175 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34348958333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15616688829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/f9c4db8f-b56e-41cd-9c87-ba2d4b36520a.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/f9c4db8f-b56e-41cd-9c87-ba2d4b36520a.json new file mode 100644 index 000000000..e24417834 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/f9c4db8f-b56e-41cd-9c87-ba2d4b36520a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/1762652579.673032", + "retrieved_timestamp": "1762652579.673033", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2423015376977531 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32193163799444524 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.033987915407854986 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35152083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15633311170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/d1ae295e-1364-442c-a3e4-ac2ad9884a78.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/d1ae295e-1364-442c-a3e4-ac2ad9884a78.json new file mode 100644 index 000000000..f1ccd1501 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/d1ae295e-1364-442c-a3e4-ac2ad9884a78.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/1762652579.673239", + "retrieved_timestamp": "1762652579.67324", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24927082363683917 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3190945593427599 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03927492447129909 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34752083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15915890957446807 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/86c29317-7d5f-42c2-a156-615d3c4a259d.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/86c29317-7d5f-42c2-a156-615d3c4a259d.json new file mode 100644 index 000000000..46a3812ff --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/86c29317-7d5f-42c2-a156-615d3c4a259d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/1762652579.673455", + "retrieved_timestamp": "1762652579.6734562", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24779695651981187 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3218405915852565 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04154078549848943 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35152083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15558510638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep/70a5a5fb-9dd6-4b1c-a7ac-11155d5ef837.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep/70a5a5fb-9dd6-4b1c-a7ac-11155d5ef837.json new file mode 100644 index 000000000..421ff332f --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep/70a5a5fb-9dd6-4b1c-a7ac-11155d5ef837.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep/1762652579.6721878", + "retrieved_timestamp": "1762652579.6721878", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23478259905938464 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33076056644270485 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34088541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16954787234042554 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5/047ed340-ddb8-40ca-b1ee-10f12b182e43.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5/047ed340-ddb8-40ca-b1ee-10f12b182e43.json new file mode 100644 index 000000000..0ca0debd8 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-2e-5/047ed340-ddb8-40ca-b1ee-10f12b182e43.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5/1762652579.65739", + "retrieved_timestamp": "1762652579.657391", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2067558522498083 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3203968601167082 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03700906344410876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3486666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16780252659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep/94b65c53-7e0c-4506-bd19-82d23709d269.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep/94b65c53-7e0c-4506-bd19-82d23709d269.json new file mode 100644 index 000000000..c1d18d78d --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep/94b65c53-7e0c-4506-bd19-82d23709d269.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-2ep/1762652579.673873", + "retrieved_timestamp": "1762652579.673873", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21747186354428472 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179879277889672 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0377643504531722 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33679166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16273271276595744 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep/1c779874-5568-462e-9e6e-0e3fd42d023e.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep/1c779874-5568-462e-9e6e-0e3fd42d023e.json new file mode 100644 index 000000000..473340d14 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep/1c779874-5568-462e-9e6e-0e3fd42d023e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-3ep/1762652579.674078", + "retrieved_timestamp": "1762652579.674078", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2198699450790569 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32974820176156994 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030211480362537766 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35933333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1651429521276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep/f562a3e4-6afe-4c1d-a597-6265af34f925.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep/f562a3e4-6afe-4c1d-a597-6265af34f925.json new file mode 100644 index 000000000..80a496166 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep/f562a3e4-6afe-4c1d-a597-6265af34f925.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-5ep/1762652579.674291", + "retrieved_timestamp": "1762652579.6742918", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2077299343519639 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3275980298873716 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027190332326283987 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3766354166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15866023936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5/cdbbfad9-85e8-4c8b-b70c-708c08a62798.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5/cdbbfad9-85e8-4c8b-b70c-708c08a62798.json new file mode 100644 index 000000000..7141974c3 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-5e-5/cdbbfad9-85e8-4c8b-b70c-708c08a62798.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-5e-5/1762652579.673672", + "retrieved_timestamp": "1762652579.673672", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2009856070781083 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31093810553451656 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.033987915407854986 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33809375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16722074468085107 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep/9cf15d33-3624-4161-bdad-069b09ab2290.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep/9cf15d33-3624-4161-bdad-069b09ab2290.json new file mode 100644 index 000000000..ce79c904c --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep/9cf15d33-3624-4161-bdad-069b09ab2290.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-2ep/1762652579.674706", + "retrieved_timestamp": "1762652579.674707", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2156234347087949 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3100411318318588 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03927492447129909 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2424496644295302 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3367291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15674867021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep/658df4b3-084f-479f-b507-3a4247683651.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep/658df4b3-084f-479f-b507-3a4247683651.json new file mode 100644 index 000000000..ce4501728 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep/658df4b3-084f-479f-b507-3a4247683651.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-3ep/1762652579.674919", + "retrieved_timestamp": "1762652579.674919", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23805502732749106 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3199313632207049 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03323262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23657718120805368 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3553645833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15217752659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep/4e72cc33-538b-4fa7-8038-89794fed6511.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep/4e72cc33-538b-4fa7-8038-89794fed6511.json new file mode 100644 index 000000000..56616abcf --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep/4e72cc33-538b-4fa7-8038-89794fed6511.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-5ep/1762652579.6751308", + "retrieved_timestamp": "1762652579.6751318", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21197644472222593 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32002953673668666 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24580536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37127083333333327 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1628158244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5/891bb442-c054-4941-9bd1-8352139f143e.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5/891bb442-c054-4941-9bd1-8352139f143e.json new file mode 100644 index 000000000..f9a64b5bb --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-7e-5/891bb442-c054-4941-9bd1-8352139f143e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-7e-5/1762652579.6744971", + "retrieved_timestamp": "1762652579.674498", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20925366915340185 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3158179005969299 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030211480362537766 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33669791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1622340425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1/ac94a989-668a-49e6-9975-9169d7394574.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1/ac94a989-668a-49e6-9975-9169d7394574.json new file mode 100644 index 000000000..2766fefd6 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1/ac94a989-668a-49e6-9975-9169d7394574.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-DPO-1epoch_v1/1762652579.67534", + "retrieved_timestamp": "1762652579.6753411", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20245947419513555 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.326814314271471 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03625377643504532 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3209166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13297872340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/6961b682-04e5-45af-bd2b-8ad6546503e7.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/6961b682-04e5-45af-bd2b-8ad6546503e7.json new file mode 100644 index 000000000..83916ef7b --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/6961b682-04e5-45af-bd2b-8ad6546503e7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/1762652579.675586", + "retrieved_timestamp": "1762652579.6755872", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1964144026737944 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32925816453885065 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04682779456193353 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32615625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13372672872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT/eb0f4662-54f5-48ca-b871-726e34bbf540.json b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT/eb0f4662-54f5-48ca-b871-726e34bbf540.json new file mode 100644 index 000000000..0045cf2a6 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen2.5-0.5B-SFT/eb0f4662-54f5-48ca-b871-726e34bbf540.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT/1762652579.654298", + "retrieved_timestamp": "1762652579.6542988", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen2.5-0.5B-SFT", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen2.5-0.5B-SFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19636453498938372 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31207478976310743 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027190332326283987 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3394270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16730385638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/e4e00595-e1ed-42c9-a518-ff104253cad9.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/e4e00595-e1ed-42c9-a518-ff104253cad9.json new file mode 100644 index 000000000..5ee0d0204 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/e4e00595-e1ed-42c9-a518-ff104253cad9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/1762652579.675801", + "retrieved_timestamp": "1762652579.675801", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25324250765746 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3140431891367934 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04909365558912387 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33145833333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15658244680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/3a7a5a89-0ab8-47cd-95c6-14a6186e05b9.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/3a7a5a89-0ab8-47cd-95c6-14a6186e05b9.json new file mode 100644 index 000000000..d31dd6c2a --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/3a7a5a89-0ab8-47cd-95c6-14a6186e05b9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/1762652579.676018", + "retrieved_timestamp": "1762652579.676018", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26695612087040166 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3188575312560274 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32879166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15625 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/f78ac837-d5f4-48f1-8a9e-1549b0020160.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/f78ac837-d5f4-48f1-8a9e-1549b0020160.json new file mode 100644 index 000000000..94ca4f4eb --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/f78ac837-d5f4-48f1-8a9e-1549b0020160.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/1762652579.6762261", + "retrieved_timestamp": "1762652579.6762261", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24807178286945303 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32608064671010917 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3368229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15649933510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/2ae9cee5-8f3c-4303-802f-481a03edaf9f.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/2ae9cee5-8f3c-4303-802f-481a03edaf9f.json new file mode 100644 index 000000000..4a11f8df7 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/2ae9cee5-8f3c-4303-802f-481a03edaf9f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/1762652579.67643", + "retrieved_timestamp": "1762652579.6764312", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23832985367713222 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32184656431310543 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3341875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15034906914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/654b55d0-940c-43bd-9478-0bd67bb7b0d8.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/654b55d0-940c-43bd-9478-0bd67bb7b0d8.json new file mode 100644 index 000000000..bd0090610 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/654b55d0-940c-43bd-9478-0bd67bb7b0d8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/1762652579.676642", + "retrieved_timestamp": "1762652579.6766431", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24714756845170813 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32244323308961736 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33276041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15334109042553193 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/c23f1072-c7be-4eab-b866-16c6429071e4.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/c23f1072-c7be-4eab-b866-16c6429071e4.json new file mode 100644 index 000000000..af75d0a66 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/c23f1072-c7be-4eab-b866-16c6429071e4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/1762652579.6768441", + "retrieved_timestamp": "1762652579.676845", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24474948691693596 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3181429193838813 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.334125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15649933510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/c02ad005-8e12-46d9-8bb3-090f62c6a946.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/c02ad005-8e12-46d9-8bb3-090f62c6a946.json new file mode 100644 index 000000000..927e5c502 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/c02ad005-8e12-46d9-8bb3-090f62c6a946.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/1762652579.677048", + "retrieved_timestamp": "1762652579.6770492", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2551408041773605 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3194064593640778 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0445619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32615625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1566655585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/e1d1dd0d-ef8e-44e1-aca1-f10c53f5aa84.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/e1d1dd0d-ef8e-44e1-aca1-f10c53f5aa84.json new file mode 100644 index 000000000..cf74787a4 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/e1d1dd0d-ef8e-44e1-aca1-f10c53f5aa84.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/1762652579.677404", + "retrieved_timestamp": "1762652579.677407", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25379216035674235 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31530652457997205 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04154078549848943 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.326125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1583277925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/02c4e0de-4a4e-44b7-bc4c-44c92ade94ec.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/02c4e0de-4a4e-44b7-bc4c-44c92ade94ec.json new file mode 100644 index 000000000..87b63916c --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/02c4e0de-4a4e-44b7-bc4c-44c92ade94ec.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/1762652579.677789", + "retrieved_timestamp": "1762652579.67779", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24022815019703275 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3168335157841944 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0377643504531722 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33279166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1568317819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/4e38a2db-c67e-4f2a-84a0-f9afa7d32bd5.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/4e38a2db-c67e-4f2a-84a0-f9afa7d32bd5.json new file mode 100644 index 000000000..86e0177a8 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/4e38a2db-c67e-4f2a-84a0-f9afa7d32bd5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/1762652579.678058", + "retrieved_timestamp": "1762652579.67806", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24839647690350491 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3210570160312575 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1573304521276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/77255cfb-3e18-4a3b-98a8-b0072aacb669.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/77255cfb-3e18-4a3b-98a8-b0072aacb669.json new file mode 100644 index 000000000..61be6349d --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/77255cfb-3e18-4a3b-98a8-b0072aacb669.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/1762652579.6783109", + "retrieved_timestamp": "1762652579.6783118", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25781371206177384 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32030958605054793 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04229607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32885416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1583277925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/be9afede-e624-43e6-99dd-52e0d2b413ac.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/be9afede-e624-43e6-99dd-52e0d2b413ac.json new file mode 100644 index 000000000..14ecc8903 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/be9afede-e624-43e6-99dd-52e0d2b413ac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/1762652579.678605", + "retrieved_timestamp": "1762652579.678606", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23163539408768735 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3258499805340021 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.052870090634441085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.322125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15799534574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/9632892a-a6b2-4f17-827e-bfef9a712985.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/9632892a-a6b2-4f17-827e-bfef9a712985.json new file mode 100644 index 000000000..b47e93966 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/9632892a-a6b2-4f17-827e-bfef9a712985.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/1762652579.678855", + "retrieved_timestamp": "1762652579.678856", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23598163982677073 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3225125170893353 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32221875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1595744680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/a690910a-388f-4a51-98a2-fc1e1bb327e2.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/a690910a-388f-4a51-98a2-fc1e1bb327e2.json new file mode 100644 index 000000000..19e07b6e2 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/a690910a-388f-4a51-98a2-fc1e1bb327e2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/1762652579.679086", + "retrieved_timestamp": "1762652579.679086", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23370878158840763 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3132229900705577 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3235208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15325797872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/8c8eafcc-bb0f-4483-93ff-1379158a5d10.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/8c8eafcc-bb0f-4483-93ff-1379158a5d10.json new file mode 100644 index 000000000..7704fcced --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/8c8eafcc-bb0f-4483-93ff-1379158a5d10.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/1762652579.6792939", + "retrieved_timestamp": "1762652579.679295", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25693936532843964 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32760017293049276 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3155833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15649933510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/6c009b93-145d-4630-bda1-fb24bf764e7a.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/6c009b93-145d-4630-bda1-fb24bf764e7a.json new file mode 100644 index 000000000..74fedd383 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/6c009b93-145d-4630-bda1-fb24bf764e7a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/1762652579.679507", + "retrieved_timestamp": "1762652579.679507", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24599839536873275 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32674094707635526 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3209166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15433843085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/1b4ccc58-920c-4089-b8ca-af3c71c5c3be.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/1b4ccc58-920c-4089-b8ca-af3c71c5c3be.json new file mode 100644 index 000000000..7f7e027ac --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/1b4ccc58-920c-4089-b8ca-af3c71c5c3be.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/1762652579.679712", + "retrieved_timestamp": "1762652579.679712", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2529178136234081 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32292563083414066 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3195208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15965757978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/4d278257-d64b-4da7-bcd6-0d3fbee80dd8.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/4d278257-d64b-4da7-bcd6-0d3fbee80dd8.json new file mode 100644 index 000000000..3effa9e78 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/4d278257-d64b-4da7-bcd6-0d3fbee80dd8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/1762652579.6799219", + "retrieved_timestamp": "1762652579.679923", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25046986440422525 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3255735108237258 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3194895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15990691489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/3650d718-e20a-4310-a248-3897f7713e93.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/3650d718-e20a-4310-a248-3897f7713e93.json new file mode 100644 index 000000000..a0f590982 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/3650d718-e20a-4310-a248-3897f7713e93.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/1762652579.680135", + "retrieved_timestamp": "1762652579.680136", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2387044153955948 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3258394284267221 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0445619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31685416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1589095744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/6e224cd8-7f12-42a0-968e-311450d24e58.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/6e224cd8-7f12-42a0-968e-311450d24e58.json new file mode 100644 index 000000000..0473c0734 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/6e224cd8-7f12-42a0-968e-311450d24e58.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/1762652579.6803432", + "retrieved_timestamp": "1762652579.6803432", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25324250765746 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32182747858122923 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32085416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15932513297872342 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/1f17dbf3-f498-41cb-8ec0-5dabb2d9655e.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/1f17dbf3-f498-41cb-8ec0-5dabb2d9655e.json new file mode 100644 index 000000000..b0aecbaa3 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/1f17dbf3-f498-41cb-8ec0-5dabb2d9655e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/1762652579.680558", + "retrieved_timestamp": "1762652579.6805592", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24562383365027018 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3299192088381941 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.318125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16015625 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/c5829ba8-e45c-4242-b308-9455f832cb58.json b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/c5829ba8-e45c-4242-b308-9455f832cb58.json new file mode 100644 index 000000000..4e154fe94 --- /dev/null +++ b/data/hfopenllm_v2/JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/c5829ba8-e45c-4242-b308-9455f832cb58.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/1762652579.680775", + "retrieved_timestamp": "1762652579.680775", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam", + "developer": "JayHyeon", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24225167001334236 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32712145602920534 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.318125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15949135638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2/4d7428e8-41a2-4834-900e-e43b05f4d131.json b/data/hfopenllm_v2/Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2/4d7428e8-41a2-4834-900e-e43b05f4d131.json new file mode 100644 index 000000000..4bca5680c --- /dev/null +++ b/data/hfopenllm_v2/Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2/4d7428e8-41a2-4834-900e-e43b05f4d131.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Jimmy19991222_Llama-3-Instruct-8B-SimPO-v0.2/1762652579.692669", + "retrieved_timestamp": "1762652579.692669", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2", + "developer": "Jimmy19991222", + "inference_platform": "unknown", + "id": "Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6540368444615842 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.498371102582105 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.061933534743202415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40125000000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3686003989361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/9e8f395c-f481-4a64-86ee-053961b17c42.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/9e8f395c-f481-4a64-86ee-053961b17c42.json new file mode 100644 index 000000000..23519410c --- /dev/null +++ b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/9e8f395c-f481-4a64-86ee-053961b17c42.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/1762652579.6929338", + "retrieved_timestamp": "1762652579.692935", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun", + "developer": "Jimmy19991222", + "inference_platform": "unknown", + "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6717221416951467 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48797965672899357 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4040729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36336436170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/913d1072-8ea3-4e0d-9d72-d30ae186dc7d.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/913d1072-8ea3-4e0d-9d72-d30ae186dc7d.json new file mode 100644 index 000000000..dcdbbe169 --- /dev/null +++ b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/913d1072-8ea3-4e0d-9d72-d30ae186dc7d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/1762652579.6931531", + "retrieved_timestamp": "1762652579.693154", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log", + "developer": "Jimmy19991222", + "inference_platform": "unknown", + "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6555605792630221 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49345840367294164 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4000104166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3657746010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/55baee54-fb05-49a1-962d-145a93de91a8.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/55baee54-fb05-49a1-962d-145a93de91a8.json new file mode 100644 index 000000000..d080d381e --- /dev/null +++ b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/55baee54-fb05-49a1-962d-145a93de91a8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/1762652579.693368", + "retrieved_timestamp": "1762652579.6933692", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log", + "developer": "Jimmy19991222", + "inference_platform": "unknown", + "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6315055164740666 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4916414793938901 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3611203457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/601e250a-5c2f-4947-9ea3-0f903b2823ec.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/601e250a-5c2f-4947-9ea3-0f903b2823ec.json new file mode 100644 index 000000000..8544a40d8 --- /dev/null +++ b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/601e250a-5c2f-4947-9ea3-0f903b2823ec.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/1762652579.69359", + "retrieved_timestamp": "1762652579.693591", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4", + "developer": "Jimmy19991222", + "inference_platform": "unknown", + "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6284580468711907 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4986088445592742 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40137500000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3544714095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/8ab1619c-6edf-457e-9834-0e9dc127d6a4.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/8ab1619c-6edf-457e-9834-0e9dc127d6a4.json new file mode 100644 index 000000000..c35539c8d --- /dev/null +++ b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/8ab1619c-6edf-457e-9834-0e9dc127d6a4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/1762652579.69381", + "retrieved_timestamp": "1762652579.693811", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun", + "developer": "Jimmy19991222", + "inference_platform": "unknown", + "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6677504576745258 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4940463886115545 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3987083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3657746010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/5f6d2c1e-1c66-4b1c-beed-a730d93d997f.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/5f6d2c1e-1c66-4b1c-beed-a730d93d997f.json new file mode 100644 index 000000000..3d34b669b --- /dev/null +++ b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/5f6d2c1e-1c66-4b1c-beed-a730d93d997f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/1762652579.69404", + "retrieved_timestamp": "1762652579.694041", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log", + "developer": "Jimmy19991222", + "inference_platform": "unknown", + "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6605063453857986 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49160075581298046 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06570996978851963 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4000416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3664394946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/6621f47a-13c7-421c-b054-cc9116a04e4e.json b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/6621f47a-13c7-421c-b054-cc9116a04e4e.json new file mode 100644 index 000000000..746499d28 --- /dev/null +++ b/data/hfopenllm_v2/Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/6621f47a-13c7-421c-b054-cc9116a04e4e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/1762652579.694266", + "retrieved_timestamp": "1762652579.6942668", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log", + "developer": "Jimmy19991222", + "inference_platform": "unknown", + "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.649190813707629 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4952489348573605 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06419939577039276 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3961354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37109375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3/464673ee-0238-40b4-9c15-1a1551b9f65c.json b/data/hfopenllm_v2/JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3/464673ee-0238-40b4-9c15-1a1551b9f65c.json new file mode 100644 index 000000000..c9d621281 --- /dev/null +++ b/data/hfopenllm_v2/JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3/464673ee-0238-40b4-9c15-1a1551b9f65c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JungZoona_T3Q-Qwen2.5-14B-Instruct-1M-e3/1762652579.696794", + "retrieved_timestamp": "1762652579.696794", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3", + "developer": "JungZoona", + "inference_platform": "unknown", + "id": "JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.732396707403024 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7585971930826706 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2862537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41694630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5911041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5884308510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Unknown", + "params_billions": 0.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-1415/08fcda98-72e9-4338-b2a2-6db924a47288.json b/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-1415/08fcda98-72e9-4338-b2a2-6db924a47288.json new file mode 100644 index 000000000..933b956d8 --- /dev/null +++ b/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-1415/08fcda98-72e9-4338-b2a2-6db924a47288.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/KSU-HW-SEC_Llama3-70b-SVA-FT-1415/1762652579.6977122", + "retrieved_timestamp": "1762652579.697713", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KSU-HW-SEC/Llama3-70b-SVA-FT-1415", + "developer": "KSU-HW-SEC", + "inference_platform": "unknown", + "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-1415" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6179913739987677 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6650146340680478 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21978851963746224 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4565416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5242686170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-500/4282c191-344e-4326-a80e-49b712687e7c.json b/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-500/4282c191-344e-4326-a80e-49b712687e7c.json new file mode 100644 index 000000000..dffec2dac --- /dev/null +++ b/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-500/4282c191-344e-4326-a80e-49b712687e7c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/KSU-HW-SEC_Llama3-70b-SVA-FT-500/1762652579.6980212", + "retrieved_timestamp": "1762652579.698022", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KSU-HW-SEC/Llama3-70b-SVA-FT-500", + "developer": "KSU-HW-SEC", + "inference_platform": "unknown", + "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-500" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6105223030448099 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6692236023098005 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21374622356495468 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45114583333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.522689494680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-final/58fe6545-2f0c-44de-a29b-2da839b141a4.json b/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-final/58fe6545-2f0c-44de-a29b-2da839b141a4.json new file mode 100644 index 000000000..102799167 --- /dev/null +++ b/data/hfopenllm_v2/KSU-HW-SEC/Llama3-70b-SVA-FT-final/58fe6545-2f0c-44de-a29b-2da839b141a4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/KSU-HW-SEC_Llama3-70b-SVA-FT-final/1762652579.698244", + "retrieved_timestamp": "1762652579.698245", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KSU-HW-SEC/Llama3-70b-SVA-FT-final", + "developer": "KSU-HW-SEC", + "inference_platform": "unknown", + "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-final" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6164676391973297 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6650146340680478 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21978851963746224 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4565416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5242686170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step/fe896cef-7667-482d-b7f1-5361fc66ccce.json b/data/hfopenllm_v2/KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step/fe896cef-7667-482d-b7f1-5361fc66ccce.json new file mode 100644 index 000000000..5d0fd4ebf --- /dev/null +++ b/data/hfopenllm_v2/KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step/fe896cef-7667-482d-b7f1-5361fc66ccce.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/KSU-HW-SEC_Llama3.1-70b-SVA-FT-1000step/1762652579.698519", + "retrieved_timestamp": "1762652579.69852", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step", + "developer": "KSU-HW-SEC", + "inference_platform": "unknown", + "id": "KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7238039512936785 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6903120365165111 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32099697885196377 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3959731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45917708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5251828457446809 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Khetterman/DarkAtom-12B-v3/64802b86-879e-4072-b5ad-aab17d7251f0.json b/data/hfopenllm_v2/Khetterman/DarkAtom-12B-v3/64802b86-879e-4072-b5ad-aab17d7251f0.json new file mode 100644 index 000000000..c8f733aa0 --- /dev/null +++ b/data/hfopenllm_v2/Khetterman/DarkAtom-12B-v3/64802b86-879e-4072-b5ad-aab17d7251f0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Khetterman_DarkAtom-12B-v3/1762652579.6987362", + "retrieved_timestamp": "1762652579.698737", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Khetterman/DarkAtom-12B-v3", + "developer": "Khetterman", + "inference_platform": "unknown", + "id": "Khetterman/DarkAtom-12B-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6173419859306639 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5153709655381875 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11102719033232629 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4468020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3546376329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Khetterman/Kosmos-8B-v1/936cbaa1-e55b-46b8-9610-a5a8faaf4434.json b/data/hfopenllm_v2/Khetterman/Kosmos-8B-v1/936cbaa1-e55b-46b8-9610-a5a8faaf4434.json new file mode 100644 index 000000000..dd0ae4913 --- /dev/null +++ b/data/hfopenllm_v2/Khetterman/Kosmos-8B-v1/936cbaa1-e55b-46b8-9610-a5a8faaf4434.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Khetterman_Kosmos-8B-v1/1762652579.6990001", + "retrieved_timestamp": "1762652579.699001", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Khetterman/Kosmos-8B-v1", + "developer": "Khetterman", + "inference_platform": "unknown", + "id": "Khetterman/Kosmos-8B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41291107594515886 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5233522858623628 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09894259818731117 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3918854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.366938164893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/KingNish/Reasoning-0.5b/98f5e59e-0bdb-405b-a18e-3addd8920951.json b/data/hfopenllm_v2/KingNish/Reasoning-0.5b/98f5e59e-0bdb-405b-a18e-3addd8920951.json new file mode 100644 index 000000000..42d569370 --- /dev/null +++ b/data/hfopenllm_v2/KingNish/Reasoning-0.5b/98f5e59e-0bdb-405b-a18e-3addd8920951.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/KingNish_Reasoning-0.5b/1762652579.6997252", + "retrieved_timestamp": "1762652579.699726", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KingNish/Reasoning-0.5b", + "developer": "KingNish", + "inference_platform": "unknown", + "id": "KingNish/Reasoning-0.5b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.217421995859874 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33536255853174524 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35133333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16414561170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Kquant03/CognitiveFusion2-4x7B-BF16/66f84aee-5d79-4fec-9fff-799ac874d165.json b/data/hfopenllm_v2/Kquant03/CognitiveFusion2-4x7B-BF16/66f84aee-5d79-4fec-9fff-799ac874d165.json new file mode 100644 index 000000000..7f3bee281 --- /dev/null +++ b/data/hfopenllm_v2/Kquant03/CognitiveFusion2-4x7B-BF16/66f84aee-5d79-4fec-9fff-799ac874d165.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Kquant03_CognitiveFusion2-4x7B-BF16/1762652579.701032", + "retrieved_timestamp": "1762652579.7010329", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kquant03/CognitiveFusion2-4x7B-BF16", + "developer": "Kquant03", + "inference_platform": "unknown", + "id": "Kquant03/CognitiveFusion2-4x7B-BF16" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35665700341759865 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41078286111483786 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4145520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27925531914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Kquant03/L3-Pneuma-8B/5420d88b-bc26-4d04-9812-ffce8a3564e6.json b/data/hfopenllm_v2/Kquant03/L3-Pneuma-8B/5420d88b-bc26-4d04-9812-ffce8a3564e6.json new file mode 100644 index 000000000..3fbd60317 --- /dev/null +++ b/data/hfopenllm_v2/Kquant03/L3-Pneuma-8B/5420d88b-bc26-4d04-9812-ffce8a3564e6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Kquant03_L3-Pneuma-8B/1762652579.701272", + "retrieved_timestamp": "1762652579.7012732", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kquant03/L3-Pneuma-8B", + "developer": "Kquant03", + "inference_platform": "unknown", + "id": "Kquant03/L3-Pneuma-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2374056392593873 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49550433176754827 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41715624999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31840093085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Krystalan/DRT-o1-14B/dbd87f5e-e5ba-447b-8416-b6413c3dab09.json b/data/hfopenllm_v2/Krystalan/DRT-o1-14B/dbd87f5e-e5ba-447b-8416-b6413c3dab09.json new file mode 100644 index 000000000..8523783ec --- /dev/null +++ b/data/hfopenllm_v2/Krystalan/DRT-o1-14B/dbd87f5e-e5ba-447b-8416-b6413c3dab09.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Krystalan_DRT-o1-14B/1762652579.70148", + "retrieved_timestamp": "1762652579.7014809", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Krystalan/DRT-o1-14B", + "developer": "Krystalan", + "inference_platform": "unknown", + "id": "Krystalan/DRT-o1-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4067662690549963 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.637927537514229 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4826283987915408 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3523489932885906 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47951041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5178690159574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Krystalan/DRT-o1-7B/acb8e4cc-41b2-47ef-b819-d480189c618c.json b/data/hfopenllm_v2/Krystalan/DRT-o1-7B/acb8e4cc-41b2-47ef-b819-d480189c618c.json new file mode 100644 index 000000000..2ed883920 --- /dev/null +++ b/data/hfopenllm_v2/Krystalan/DRT-o1-7B/acb8e4cc-41b2-47ef-b819-d480189c618c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Krystalan_DRT-o1-7B/1762652579.701715", + "retrieved_timestamp": "1762652579.701716", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Krystalan/DRT-o1-7B", + "developer": "Krystalan", + "inference_platform": "unknown", + "id": "Krystalan/DRT-o1-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3928276971768242 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5467693339610741 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4478851963746224 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.50865625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41514295212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5/4775e169-e3a7-41b6-bf1e-a7e8e0edb4fc.json b/data/hfopenllm_v2/Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5/4775e169-e3a7-41b6-bf1e-a7e8e0edb4fc.json new file mode 100644 index 000000000..f27f4601c --- /dev/null +++ b/data/hfopenllm_v2/Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5/4775e169-e3a7-41b6-bf1e-a7e8e0edb4fc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralExperiment-7b-MagicCoder-v7.5/1762652579.701928", + "retrieved_timestamp": "1762652579.7019289", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5", + "developer": "Kukedlc", + "inference_platform": "unknown", + "id": "Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4552509563513699 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3988446544778517 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4281979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2824135638297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.1/3d2603e3-d556-48e8-ba94-555faf9f1807.json b/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.1/3d2603e3-d556-48e8-ba94-555faf9f1807.json new file mode 100644 index 000000000..eec2d7492 --- /dev/null +++ b/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.1/3d2603e3-d556-48e8-ba94-555faf9f1807.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralSynthesis-7B-v0.1/1762652579.7026482", + "retrieved_timestamp": "1762652579.702649", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kukedlc/NeuralSynthesis-7B-v0.1", + "developer": "Kukedlc", + "inference_platform": "unknown", + "id": "Kukedlc/NeuralSynthesis-7B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4184563624516283 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5144745481048844 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43328125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.304936835106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.3/b3412f38-d0bc-47c9-a750-14bdbf4e65d8.json b/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.3/b3412f38-d0bc-47c9-a750-14bdbf4e65d8.json new file mode 100644 index 000000000..8ee26453d --- /dev/null +++ b/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7B-v0.3/b3412f38-d0bc-47c9-a750-14bdbf4e65d8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralSynthesis-7B-v0.3/1762652579.702864", + "retrieved_timestamp": "1762652579.702865", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kukedlc/NeuralSynthesis-7B-v0.3", + "developer": "Kukedlc", + "inference_platform": "unknown", + "id": "Kukedlc/NeuralSynthesis-7B-v0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4078400865259733 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5138078814382175 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07779456193353475 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4345833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30501994680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7b-v0.4-slerp/4e30bf00-f6b7-4c28-8cf8-dc64427fb958.json b/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7b-v0.4-slerp/4e30bf00-f6b7-4c28-8cf8-dc64427fb958.json new file mode 100644 index 000000000..2a9b9b758 --- /dev/null +++ b/data/hfopenllm_v2/Kukedlc/NeuralSynthesis-7b-v0.4-slerp/4e30bf00-f6b7-4c28-8cf8-dc64427fb958.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralSynthesis-7b-v0.4-slerp/1762652579.7030761", + "retrieved_timestamp": "1762652579.703077", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kukedlc/NeuralSynthesis-7b-v0.4-slerp", + "developer": "Kukedlc", + "inference_platform": "unknown", + "id": "Kukedlc/NeuralSynthesis-7b-v0.4-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3947259936967247 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5142932549151301 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06268882175226587 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43324999999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3042719414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Kumar955/Hemanth-llm/0787e240-a1f4-444a-b3dd-7ef1a1d394b4.json b/data/hfopenllm_v2/Kumar955/Hemanth-llm/0787e240-a1f4-444a-b3dd-7ef1a1d394b4.json new file mode 100644 index 000000000..912957177 --- /dev/null +++ b/data/hfopenllm_v2/Kumar955/Hemanth-llm/0787e240-a1f4-444a-b3dd-7ef1a1d394b4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Kumar955_Hemanth-llm/1762652579.703545", + "retrieved_timestamp": "1762652579.703546", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kumar955/Hemanth-llm", + "developer": "Kumar955", + "inference_platform": "unknown", + "id": "Kumar955/Hemanth-llm" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5045102550122564 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.522494907014536 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0702416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4485625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3112533244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1/02fee4d1-8899-4a93-b6f1-a1a8d251cedd.json b/data/hfopenllm_v2/L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1/02fee4d1-8899-4a93-b6f1-a1a8d251cedd.json new file mode 100644 index 000000000..cbc899f7e --- /dev/null +++ b/data/hfopenllm_v2/L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1/02fee4d1-8899-4a93-b6f1-a1a8d251cedd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/L-RAGE_3_PRYMMAL-ECE-7B-SLERP-V1/1762652579.703805", + "retrieved_timestamp": "1762652579.703806", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1", + "developer": "L-RAGE", + "inference_platform": "unknown", + "id": "L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27422572108671656 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.422793974567173 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10800604229607251 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3841354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29247007978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/97f7c73d-6d69-4c04-9cff-4914253003b0.json b/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/97f7c73d-6d69-4c04-9cff-4914253003b0.json new file mode 100644 index 000000000..257ba6996 --- /dev/null +++ b/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/97f7c73d-6d69-4c04-9cff-4914253003b0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LGAI-EXAONE_EXAONE-3.0-7.8B-Instruct/1762652579.705025", + "retrieved_timestamp": "1762652579.705025", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", + "developer": "LGAI-EXAONE", + "inference_platform": "unknown", + "id": "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7192826145737754 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4174432647784512 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30438066465256797 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.366125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35771276595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "ExaoneForCausalLM", + "params_billions": 7.8 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct/e2a2d764-ba6b-450d-8f94-abf2af95e793.json b/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct/e2a2d764-ba6b-450d-8f94-abf2af95e793.json new file mode 100644 index 000000000..261b260a3 --- /dev/null +++ b/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct/e2a2d764-ba6b-450d-8f94-abf2af95e793.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LGAI-EXAONE_EXAONE-3.5-2.4B-Instruct/1762652579.705282", + "retrieved_timestamp": "1762652579.7052832", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct", + "developer": "LGAI-EXAONE", + "inference_platform": "unknown", + "id": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7950449252428002 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4092347113723405 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3678247734138973 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.366125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32804188829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "ExaoneForCausalLM", + "params_billions": 2.405 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-32B-Instruct/a172b1d1-6d6e-4cd9-9a85-78cb4f71661e.json b/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-32B-Instruct/a172b1d1-6d6e-4cd9-9a85-78cb4f71661e.json new file mode 100644 index 000000000..d41a58c59 --- /dev/null +++ b/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-32B-Instruct/a172b1d1-6d6e-4cd9-9a85-78cb4f71661e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LGAI-EXAONE_EXAONE-3.5-32B-Instruct/1762652579.705488", + "retrieved_timestamp": "1762652579.705489", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LGAI-EXAONE/EXAONE-3.5-32B-Instruct", + "developer": "LGAI-EXAONE", + "inference_platform": "unknown", + "id": "LGAI-EXAONE/EXAONE-3.5-32B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8391833668000904 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5760913742720142 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5128398791540786 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38066666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4636801861702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "ExaoneForCausalLM", + "params_billions": 32.003 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct/7fa474fb-4aa1-4855-9759-a28056c7a5e7.json b/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct/7fa474fb-4aa1-4855-9759-a28056c7a5e7.json new file mode 100644 index 000000000..18ba8467c --- /dev/null +++ b/data/hfopenllm_v2/LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct/7fa474fb-4aa1-4855-9759-a28056c7a5e7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LGAI-EXAONE_EXAONE-3.5-7.8B-Instruct/1762652579.705873", + "retrieved_timestamp": "1762652579.705875", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct", + "developer": "LGAI-EXAONE", + "inference_platform": "unknown", + "id": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8136045692096969 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4727592304359862 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47507552870090636 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3779375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4133144946808511 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "ExaoneForCausalLM", + "params_billions": 7.818 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LLM360/K2-Chat/f7e7c296-74f4-49fa-946d-142341749355.json b/data/hfopenllm_v2/LLM360/K2-Chat/f7e7c296-74f4-49fa-946d-142341749355.json new file mode 100644 index 000000000..4f02e98bd --- /dev/null +++ b/data/hfopenllm_v2/LLM360/K2-Chat/f7e7c296-74f4-49fa-946d-142341749355.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LLM360_K2-Chat/1762652579.706591", + "retrieved_timestamp": "1762652579.706592", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLM360/K2-Chat", + "developer": "LLM360", + "inference_platform": "unknown", + "id": "LLM360/K2-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5151763986223221 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5358099630242067 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10347432024169184 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.457 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3371010638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 65.286 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LLM360/K2/4b1e267f-90c4-403a-a7cd-5c006153408b.json b/data/hfopenllm_v2/LLM360/K2/4b1e267f-90c4-403a-a7cd-5c006153408b.json new file mode 100644 index 000000000..ab179cbfc --- /dev/null +++ b/data/hfopenllm_v2/LLM360/K2/4b1e267f-90c4-403a-a7cd-5c006153408b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LLM360_K2/1762652579.706215", + "retrieved_timestamp": "1762652579.7062159", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLM360/K2", + "developer": "LLM360", + "inference_platform": "unknown", + "id": "LLM360/K2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2252157608478836 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4971835676523677 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027190332326283987 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39799999999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30044880319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 65.286 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LLM4Binary/llm4decompile-1.3b-v2/86f0a81b-69da-4f36-a6b0-8a36f79d5c1c.json b/data/hfopenllm_v2/LLM4Binary/llm4decompile-1.3b-v2/86f0a81b-69da-4f36-a6b0-8a36f79d5c1c.json new file mode 100644 index 000000000..a95191edd --- /dev/null +++ b/data/hfopenllm_v2/LLM4Binary/llm4decompile-1.3b-v2/86f0a81b-69da-4f36-a6b0-8a36f79d5c1c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LLM4Binary_llm4decompile-1.3b-v2/1762652579.7068748", + "retrieved_timestamp": "1762652579.706877", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LLM4Binary/llm4decompile-1.3b-v2", + "developer": "LLM4Binary", + "inference_platform": "unknown", + "id": "LLM4Binary/llm4decompile-1.3b-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22678936333373229 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271808417267589 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23573825503355705 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4071770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12092752659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.346 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Langboat/Mengzi3-8B-Chat/13e12b5c-d3bb-4634-967d-e5741e623be1.json b/data/hfopenllm_v2/Langboat/Mengzi3-8B-Chat/13e12b5c-d3bb-4634-967d-e5741e623be1.json new file mode 100644 index 000000000..21cc4135e --- /dev/null +++ b/data/hfopenllm_v2/Langboat/Mengzi3-8B-Chat/13e12b5c-d3bb-4634-967d-e5741e623be1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Langboat_Mengzi3-8B-Chat/1762652579.707526", + "retrieved_timestamp": "1762652579.707527", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Langboat/Mengzi3-8B-Chat", + "developer": "Langboat", + "inference_platform": "unknown", + "id": "Langboat/Mengzi3-8B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.513977357854936 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4683725003203179 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09063444108761329 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4077916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31416223404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBA100/745591e3-3c6a-473a-9e51-4bffe1c86fa7.json b/data/hfopenllm_v2/Lawnakk/BBA100/745591e3-3c6a-473a-9e51-4bffe1c86fa7.json new file mode 100644 index 000000000..8146728c6 --- /dev/null +++ b/data/hfopenllm_v2/Lawnakk/BBA100/745591e3-3c6a-473a-9e51-4bffe1c86fa7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lawnakk_BBA100/1762652579.707814", + "retrieved_timestamp": "1762652579.707815", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lawnakk/BBA100", + "developer": "Lawnakk", + "inference_platform": "unknown", + "id": "Lawnakk/BBA100" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2075803312987318 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2825701502983552 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.009818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24412751677852348 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40196875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11220079787234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.0/61739e6e-92b0-4577-acd2-8c58ffc612a4.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.0/61739e6e-92b0-4577-acd2-8c58ffc612a4.json new file mode 100644 index 000000000..00cefef81 --- /dev/null +++ b/data/hfopenllm_v2/Lawnakk/BBALAW1.0/61739e6e-92b0-4577-acd2-8c58ffc612a4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.0/1762652579.708328", + "retrieved_timestamp": "1762652579.708329", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lawnakk/BBALAW1.0", + "developer": "Lawnakk", + "inference_platform": "unknown", + "id": "Lawnakk/BBALAW1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13511482865463637 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28276697965906106 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3525729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11278257978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 4.353 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.2/917081cc-ee33-4c1f-85b0-9256ef57f6b3.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.2/917081cc-ee33-4c1f-85b0-9256ef57f6b3.json new file mode 100644 index 000000000..90250844c --- /dev/null +++ b/data/hfopenllm_v2/Lawnakk/BBALAW1.2/917081cc-ee33-4c1f-85b0-9256ef57f6b3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.2/1762652579.708597", + "retrieved_timestamp": "1762652579.708598", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lawnakk/BBALAW1.2", + "developer": "Lawnakk", + "inference_platform": "unknown", + "id": "Lawnakk/BBALAW1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13543952268868825 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28112730419661675 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35790625000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11053856382978723 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 4.353 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.3/60fa19b9-bf1d-4f39-b421-cb59379f5206.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.3/60fa19b9-bf1d-4f39-b421-cb59379f5206.json new file mode 100644 index 000000000..4dc5a8c3a --- /dev/null +++ b/data/hfopenllm_v2/Lawnakk/BBALAW1.3/60fa19b9-bf1d-4f39-b421-cb59379f5206.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.3/1762652579.70884", + "retrieved_timestamp": "1762652579.7088408", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lawnakk/BBALAW1.3", + "developer": "Lawnakk", + "inference_platform": "unknown", + "id": "Lawnakk/BBALAW1.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13543952268868825 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28269808045232453 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36190625000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.109375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 4.353 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.6/684962b9-d734-4a10-a0cb-45bc4d957c2c.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.6/684962b9-d734-4a10-a0cb-45bc4d957c2c.json new file mode 100644 index 000000000..12e48bcfb --- /dev/null +++ b/data/hfopenllm_v2/Lawnakk/BBALAW1.6/684962b9-d734-4a10-a0cb-45bc4d957c2c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.6/1762652579.7090619", + "retrieved_timestamp": "1762652579.7090628", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lawnakk/BBALAW1.6", + "developer": "Lawnakk", + "inference_platform": "unknown", + "id": "Lawnakk/BBALAW1.6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5245437660961804 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.555356284691385 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36027190332326287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43684375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45071476063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.61/af87bb98-cc36-4c8d-9694-7e7428a899ac.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.61/af87bb98-cc36-4c8d-9694-7e7428a899ac.json new file mode 100644 index 000000000..4805c16fd --- /dev/null +++ b/data/hfopenllm_v2/Lawnakk/BBALAW1.61/af87bb98-cc36-4c8d-9694-7e7428a899ac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.61/1762652579.709277", + "retrieved_timestamp": "1762652579.7092779", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lawnakk/BBALAW1.61", + "developer": "Lawnakk", + "inference_platform": "unknown", + "id": "Lawnakk/BBALAW1.61" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5771253607095839 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5548582474785428 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36631419939577037 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4355104166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4470578457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.62/5dc300f1-e908-4d71-addc-2717e3702b12.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.62/5dc300f1-e908-4d71-addc-2717e3702b12.json new file mode 100644 index 000000000..61137551f --- /dev/null +++ b/data/hfopenllm_v2/Lawnakk/BBALAW1.62/5dc300f1-e908-4d71-addc-2717e3702b12.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.62/1762652579.709492", + "retrieved_timestamp": "1762652579.709493", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lawnakk/BBALAW1.62", + "developer": "Lawnakk", + "inference_platform": "unknown", + "id": "Lawnakk/BBALAW1.62" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5046099903810778 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5580519941056026 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2824773413897281 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4343333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45445478723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.63/6005fc02-9f02-436a-a535-ec68a3c6dbc6.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.63/6005fc02-9f02-436a-a535-ec68a3c6dbc6.json new file mode 100644 index 000000000..09812cd8e --- /dev/null +++ b/data/hfopenllm_v2/Lawnakk/BBALAW1.63/6005fc02-9f02-436a-a535-ec68a3c6dbc6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.63/1762652579.709696", + "retrieved_timestamp": "1762652579.709697", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lawnakk/BBALAW1.63", + "developer": "Lawnakk", + "inference_platform": "unknown", + "id": "Lawnakk/BBALAW1.63" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44073835201709244 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5540633758841665 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37009063444108764 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4303333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4470578457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1.64/4a4ce0f8-c41f-469e-b7c7-a4e3d857377e.json b/data/hfopenllm_v2/Lawnakk/BBALAW1.64/4a4ce0f8-c41f-469e-b7c7-a4e3d857377e.json new file mode 100644 index 000000000..c0798c4c8 --- /dev/null +++ b/data/hfopenllm_v2/Lawnakk/BBALAW1.64/4a4ce0f8-c41f-469e-b7c7-a4e3d857377e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.64/1762652579.709901", + "retrieved_timestamp": "1762652579.709902", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lawnakk/BBALAW1.64", + "developer": "Lawnakk", + "inference_platform": "unknown", + "id": "Lawnakk/BBALAW1.64" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13946107439371977 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27790701865141654 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2483221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3446666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11153590425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lawnakk/BBALAW1/59b40f56-c27f-4b15-9288-b7033e2e4f26.json b/data/hfopenllm_v2/Lawnakk/BBALAW1/59b40f56-c27f-4b15-9288-b7033e2e4f26.json new file mode 100644 index 000000000..5799c2766 --- /dev/null +++ b/data/hfopenllm_v2/Lawnakk/BBALAW1/59b40f56-c27f-4b15-9288-b7033e2e4f26.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1/1762652579.708089", + "retrieved_timestamp": "1762652579.70809", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lawnakk/BBALAW1", + "developer": "Lawnakk", + "inference_platform": "unknown", + "id": "Lawnakk/BBALAW1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19054442213327305 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28723681696502185 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.009818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24328859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4152708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11211768617021277 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/CheckPoint_A/771366a5-e227-4ff8-b60f-744020994bec.json b/data/hfopenllm_v2/LeroyDyer/CheckPoint_A/771366a5-e227-4ff8-b60f-744020994bec.json new file mode 100644 index 000000000..7de403a37 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/CheckPoint_A/771366a5-e227-4ff8-b60f-744020994bec.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_CheckPoint_A/1762652579.714355", + "retrieved_timestamp": "1762652579.714355", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/CheckPoint_A", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/CheckPoint_A" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45127927233074905 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4747699745968042 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4230833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28798204787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/CheckPoint_B/4e44fd55-9538-4065-8763-5d1c3d00be5d.json b/data/hfopenllm_v2/LeroyDyer/CheckPoint_B/4e44fd55-9538-4065-8763-5d1c3d00be5d.json new file mode 100644 index 000000000..e51118dbe --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/CheckPoint_B/4e44fd55-9538-4065-8763-5d1c3d00be5d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_CheckPoint_B/1762652579.7146208", + "retrieved_timestamp": "1762652579.714622", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/CheckPoint_B", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/CheckPoint_B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4439852923576111 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47799475378324896 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07175226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38984375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29072473404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/CheckPoint_C/a4fe370d-1722-4fdf-bf75-8416baeaba19.json b/data/hfopenllm_v2/LeroyDyer/CheckPoint_C/a4fe370d-1722-4fdf-bf75-8416baeaba19.json new file mode 100644 index 000000000..c6971e6f6 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/CheckPoint_C/a4fe370d-1722-4fdf-bf75-8416baeaba19.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_CheckPoint_C/1762652579.714836", + "retrieved_timestamp": "1762652579.714837", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/CheckPoint_C", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/CheckPoint_C" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34768968558979063 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45864215446207585 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4346145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30211103723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/CheckPoint_R1/7eba2aef-5c97-4526-92a8-d62bd5b59b6f.json b/data/hfopenllm_v2/LeroyDyer/CheckPoint_R1/7eba2aef-5c97-4526-92a8-d62bd5b59b6f.json new file mode 100644 index 000000000..c037bf164 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/CheckPoint_R1/7eba2aef-5c97-4526-92a8-d62bd5b59b6f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_CheckPoint_R1/1762652579.715039", + "retrieved_timestamp": "1762652579.71504", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/CheckPoint_R1", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/CheckPoint_R1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17278376928771216 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4225419506658359 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4031458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22049534574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/LCARS_AI_001/f6b84bde-67aa-4c50-a46e-1f80605037de.json b/data/hfopenllm_v2/LeroyDyer/LCARS_AI_001/f6b84bde-67aa-4c50-a46e-1f80605037de.json new file mode 100644 index 000000000..b25a48e9a --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/LCARS_AI_001/f6b84bde-67aa-4c50-a46e-1f80605037de.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_LCARS_AI_001/1762652579.7152472", + "retrieved_timestamp": "1762652579.715248", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/LCARS_AI_001", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/LCARS_AI_001" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31094495937445976 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42578875825590146 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.023413897280966767 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43836458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2670378989361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/LCARS_AI_1x4_003_SuperAI/db8614eb-2b53-460c-a80b-dceb47a9703f.json b/data/hfopenllm_v2/LeroyDyer/LCARS_AI_1x4_003_SuperAI/db8614eb-2b53-460c-a80b-dceb47a9703f.json new file mode 100644 index 000000000..0aec10b89 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/LCARS_AI_1x4_003_SuperAI/db8614eb-2b53-460c-a80b-dceb47a9703f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_LCARS_AI_1x4_003_SuperAI/1762652579.7154438", + "retrieved_timestamp": "1762652579.715445", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/LCARS_AI_1x4_003_SuperAI", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/LCARS_AI_1x4_003_SuperAI" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41111251479407973 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49198503573704794 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4506145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29720744680851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/LCARS_AI_StarTrek_Computer/a3e19823-43ac-44ac-9dee-960a98139fa8.json b/data/hfopenllm_v2/LeroyDyer/LCARS_AI_StarTrek_Computer/a3e19823-43ac-44ac-9dee-960a98139fa8.json new file mode 100644 index 000000000..6ccf5e323 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/LCARS_AI_StarTrek_Computer/a3e19823-43ac-44ac-9dee-960a98139fa8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_LCARS_AI_StarTrek_Computer/1762652579.7157388", + "retrieved_timestamp": "1762652579.715741", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/LCARS_AI_StarTrek_Computer", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/LCARS_AI_StarTrek_Computer" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35825609383103496 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4446191188748297 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3950208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24584441489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/LCARS_TOP_SCORE/04631aa2-f1fd-4aea-ba88-53b474c71fe8.json b/data/hfopenllm_v2/LeroyDyer/LCARS_TOP_SCORE/04631aa2-f1fd-4aea-ba88-53b474c71fe8.json new file mode 100644 index 000000000..9da6bb450 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/LCARS_TOP_SCORE/04631aa2-f1fd-4aea-ba88-53b474c71fe8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_LCARS_TOP_SCORE/1762652579.716028", + "retrieved_timestamp": "1762652579.716029", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/LCARS_TOP_SCORE", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/LCARS_TOP_SCORE" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43706587410293574 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5127371051825098 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06722054380664652 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42928125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3031083776595745 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/Mixtral_AI_SwahiliTron_7b/4f5fadb6-5fad-4b82-a027-1d4f497dc476.json b/data/hfopenllm_v2/LeroyDyer/Mixtral_AI_SwahiliTron_7b/4f5fadb6-5fad-4b82-a027-1d4f497dc476.json new file mode 100644 index 000000000..f6458571c --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/Mixtral_AI_SwahiliTron_7b/4f5fadb6-5fad-4b82-a027-1d4f497dc476.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_Mixtral_AI_SwahiliTron_7b/1762652579.716297", + "retrieved_timestamp": "1762652579.716299", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/Mixtral_AI_SwahiliTron_7b", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/Mixtral_AI_SwahiliTron_7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1533996462718919 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3055092453201354 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34203125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12076130319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI/8e1f811e-3e86-4440-a5dd-bf607aa02ad6.json b/data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI/8e1f811e-3e86-4440-a5dd-bf607aa02ad6.json new file mode 100644 index 000000000..263b42755 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI/8e1f811e-3e86-4440-a5dd-bf607aa02ad6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWebAI_Human_AGI/1762652579.7166212", + "retrieved_timestamp": "1762652579.716622", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWebAI_Human_AGI", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWebAI_Human_AGI" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3388221031308041 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3374862127508733 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39663541666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1478557180851064 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI_001/a4c9a905-1a7c-406a-ab38-6a5e71ed0bf5.json b/data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI_001/a4c9a905-1a7c-406a-ab38-6a5e71ed0bf5.json new file mode 100644 index 000000000..a7ef6fd80 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWebAI_Human_AGI_001/a4c9a905-1a7c-406a-ab38-6a5e71ed0bf5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWebAI_Human_AGI_001/1762652579.716855", + "retrieved_timestamp": "1762652579.716856", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWebAI_Human_AGI_001", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWebAI_Human_AGI_001" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31181930610779396 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3433421938604874 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39939583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14261968085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b/e8b992b8-9f0a-4bfb-ab53-3b07ca1ca117.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b/e8b992b8-9f0a-4bfb-ab53-3b07ca1ca117.json new file mode 100644 index 000000000..945c5e8bb --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b/e8b992b8-9f0a-4bfb-ab53-3b07ca1ca117.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_CyberTron_Ultra_7b/1762652579.71707", + "retrieved_timestamp": "1762652579.717071", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15557276914143361 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48107736108561827 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41362499999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2865691489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2/daa704a9-2eed-4549-a847-3606c9e8a733.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2/daa704a9-2eed-4549-a847-3606c9e8a733.json new file mode 100644 index 000000000..8681a5aea --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2/daa704a9-2eed-4549-a847-3606c9e8a733.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAGI_001_M2/1762652579.71728", + "retrieved_timestamp": "1762652579.717281", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39395138233221183 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4888172059118469 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4503020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.300531914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_002/3a6cfbae-80c1-4ec6-9c14-1ddeeb6e7138.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_002/3a6cfbae-80c1-4ec6-9c14-1ddeeb6e7138.json new file mode 100644 index 000000000..8aec5a3d9 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAGI_002/3a6cfbae-80c1-4ec6-9c14-1ddeeb6e7138.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAGI_002/1762652579.71767", + "retrieved_timestamp": "1762652579.7176719", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_AI_HumanAGI_002", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_AI_HumanAGI_002" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40876430094371824 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5043871825389313 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48648958333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3058510638297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_001/f177b7f7-7143-4f72-9f9d-54fe2bc9797b.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_001/f177b7f7-7143-4f72-9f9d-54fe2bc9797b.json new file mode 100644 index 000000000..a2e7d3f1c --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_001/f177b7f7-7143-4f72-9f9d-54fe2bc9797b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_001/1762652579.717986", + "retrieved_timestamp": "1762652579.717987", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_AI_HumanAI_001", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_001" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22516589316347294 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33440360243051986 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38603125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1270777925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_006/cdbebbea-4749-472b-8cec-5da5ffa96d65.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_006/cdbebbea-4749-472b-8cec-5da5ffa96d65.json new file mode 100644 index 000000000..8adb61d18 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_006/cdbebbea-4749-472b-8cec-5da5ffa96d65.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_006/1762652579.718229", + "retrieved_timestamp": "1762652579.71823", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_AI_HumanAI_006", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_006" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14300832901146734 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3301800420981355 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3567916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11353058510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_007/3143a635-10da-4cb5-9c2f-eae2988d9e60.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_007/3143a635-10da-4cb5-9c2f-eae2988d9e60.json new file mode 100644 index 000000000..f9e8ec64a --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_007/3143a635-10da-4cb5-9c2f-eae2988d9e60.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_007/1762652579.718461", + "retrieved_timestamp": "1762652579.718461", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_AI_HumanAI_007", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_007" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3351751131442351 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3415665794743605 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.022658610271903322 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40962499999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13522273936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT/a6d3b7b1-8834-4b74-8849-6d80381c46f5.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT/a6d3b7b1-8834-4b74-8849-6d80381c46f5.json new file mode 100644 index 000000000..5fdb2920f --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT/a6d3b7b1-8834-4b74-8849-6d80381c46f5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_009_CHAT/1762652579.718692", + "retrieved_timestamp": "1762652579.718693", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2973310815303395 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3306728717792965 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1432845744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT/7f53cef7-fba6-4802-93a2-b54f82a32d74.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT/7f53cef7-fba6-4802-93a2-b54f82a32d74.json new file mode 100644 index 000000000..6329d771b --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT/7f53cef7-fba6-4802-93a2-b54f82a32d74.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_010_CHAT/1762652579.7189271", + "retrieved_timestamp": "1762652579.7189288", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2506948230694557 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33363164762455844 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01812688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41371874999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14303523936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT/bc7bf4d0-45e9-4b37-8e5f-edc92fb1bd66.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT/bc7bf4d0-45e9-4b37-8e5f-edc92fb1bd66.json new file mode 100644 index 000000000..373b05b48 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT/bc7bf4d0-45e9-4b37-8e5f-edc92fb1bd66.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT/1762652579.719242", + "retrieved_timestamp": "1762652579.719243", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3148667757106699 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3522609512356862 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3831458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15949135638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/fbd83964-530c-4d0e-a305-9f8451affb23.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/fbd83964-530c-4d0e-a305-9f8451affb23.json new file mode 100644 index 000000000..e061856ae --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/fbd83964-530c-4d0e-a305-9f8451affb23.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/1762652579.719551", + "retrieved_timestamp": "1762652579.719552", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37524213531208306 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39840187861283577 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0256797583081571 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42391666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2018783244680851 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/10d76569-edca-47db-abf2-1d0fd73df198.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/10d76569-edca-47db-abf2-1d0fd73df198.json new file mode 100644 index 000000000..b6e9a467c --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/10d76569-edca-47db-abf2-1d0fd73df198.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/1762652579.7198021", + "retrieved_timestamp": "1762652579.7198029", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4049677079039171 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48583341042911066 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3921354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2956283244680851 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/431f8459-3c12-4260-a158-c58ec910590d.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/431f8459-3c12-4260-a158-c58ec910590d.json new file mode 100644 index 000000000..49ec3a514 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/431f8459-3c12-4260-a158-c58ec910590d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/1762652579.720226", + "retrieved_timestamp": "1762652579.720227", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30664858131978706 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45768864760562744 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0445619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42540625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23179853723404256 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/bcd8c141-d286-4567-bb06-934e546a5c7c.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/bcd8c141-d286-4567-bb06-934e546a5c7c.json new file mode 100644 index 000000000..8378dc341 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/bcd8c141-d286-4567-bb06-934e546a5c7c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/1762652579.720018", + "retrieved_timestamp": "1762652579.7200189", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30355124403250044 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4575107149412439 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0445619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42534374999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23287898936170212 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/9cc77018-d090-4202-bcf5-d0031097b84e.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/9cc77018-d090-4202-bcf5-d0031097b84e.json new file mode 100644 index 000000000..a155f454f --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/9cc77018-d090-4202-bcf5-d0031097b84e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/1762652579.7204201", + "retrieved_timestamp": "1762652579.720421", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3065987136353764 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3158421938604874 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34438541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11070478723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/0b365c44-3cc2-4149-8614-7de6b6c2581d.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/0b365c44-3cc2-4149-8614-7de6b6c2581d.json new file mode 100644 index 000000000..56fc9fd4a --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/0b365c44-3cc2-4149-8614-7de6b6c2581d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/1762652579.72064", + "retrieved_timestamp": "1762652579.7206411", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35788153211257245 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4476544560399054 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04229607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41340625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23761635638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/dc90b971-313a-4a76-b042-350adf37a43c.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/dc90b971-313a-4a76-b042-350adf37a43c.json new file mode 100644 index 000000000..21577c849 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/dc90b971-313a-4a76-b042-350adf37a43c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/1762652579.720855", + "retrieved_timestamp": "1762652579.720855", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37976347203198624 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44827466097749213 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4148020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2388630319148936 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_RP/a4a38b96-036f-40db-8a0b-024a36f004f5.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_RP/a4a38b96-036f-40db-8a0b-024a36f004f5.json new file mode 100644 index 000000000..a674eb708 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_RP/a4a38b96-036f-40db-8a0b-024a36f004f5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_RP/1762652579.721039", + "retrieved_timestamp": "1762652579.7210398", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_AI_HumanAI_RP", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2541168543907942 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33230179059744286 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3882604166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1323969414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_TextVision/558a0ed7-a667-421e-bbab-094b46274239.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_TextVision/558a0ed7-a667-421e-bbab-094b46274239.json new file mode 100644 index 000000000..29293bcfb --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_AI_HumanAI_TextVision/558a0ed7-a667-421e-bbab-094b46274239.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_TextVision/1762652579.7212439", + "retrieved_timestamp": "1762652579.7212448", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_AI_HumanAI_TextVision", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_AI_HumanAI_TextVision" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062740196013245 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33536617928965984 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39384375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13871343085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M1/ee856df0-01ea-4f06-9323-951144c9e82f.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M1/ee856df0-01ea-4f06-9323-951144c9e82f.json new file mode 100644 index 000000000..764f11cd8 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M1/ee856df0-01ea-4f06-9323-951144c9e82f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_HumanAI_M1/1762652579.721453", + "retrieved_timestamp": "1762652579.721453", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_HumanAI_M1", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_HumanAI_M1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3582062261466243 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35632705798398107 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.024924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36711458333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1663065159574468 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M2/4ea0436d-6ec9-40db-af56-2f7f1b0317df.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M2/4ea0436d-6ec9-40db-af56-2f7f1b0317df.json new file mode 100644 index 000000000..65bd79fbb --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M2/4ea0436d-6ec9-40db-af56-2f7f1b0317df.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_HumanAI_M2/1762652579.7216609", + "retrieved_timestamp": "1762652579.721662", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_HumanAI_M2", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_HumanAI_M2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3750171766468526 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39308772552915555 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028700906344410877 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3751458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2010472074468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M3/d5dd0be3-e7a7-4636-b513-3c1d5532807f.json b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M3/d5dd0be3-e7a7-4636-b513-3c1d5532807f.json new file mode 100644 index 000000000..a93e513f2 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/SpydazWeb_HumanAI_M3/d5dd0be3-e7a7-4636-b513-3c1d5532807f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_HumanAI_M3/1762652579.721856", + "retrieved_timestamp": "1762652579.721857", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/SpydazWeb_HumanAI_M3", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/SpydazWeb_HumanAI_M3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1578711153073844 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31272572546166244 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3914270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11486037234042554 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_12/b4b57280-49db-4a07-929f-dbe2f222250c.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_12/b4b57280-49db-4a07-929f-dbe2f222250c.json new file mode 100644 index 000000000..b9d90ecfc --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_12/b4b57280-49db-4a07-929f-dbe2f222250c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_12/1762652579.722054", + "retrieved_timestamp": "1762652579.722055", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_12", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_12" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2764985793250797 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31633960292107943 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35815624999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11369680851063829 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_14/6233aac6-0ce3-4f3c-8ee0-87d2482d3ea2.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_14/6233aac6-0ce3-4f3c-8ee0-87d2482d3ea2.json new file mode 100644 index 000000000..ffb71597d --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_14/6233aac6-0ce3-4f3c-8ee0-87d2482d3ea2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_14/1762652579.722256", + "retrieved_timestamp": "1762652579.722257", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_14", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_14" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1811770546594148 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2988848127354542 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3395208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11394614361702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_001/51d4724b-c85c-4ad4-a4bd-9be93cd99a2a.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_001/51d4724b-c85c-4ad4-a4bd-9be93cd99a2a.json new file mode 100644 index 000000000..a4a80808f --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_001/51d4724b-c85c-4ad4-a4bd-9be93cd99a2a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_001/1762652579.72245", + "retrieved_timestamp": "1762652579.722451", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_001", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_001" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4505046609662362 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4609124425176902 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42559375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2734375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_002/86e8ff02-0dd2-4023-ab18-359d24a8a4fd.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_002/86e8ff02-0dd2-4023-ab18-359d24a8a4fd.json new file mode 100644 index 000000000..962a21159 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_002/86e8ff02-0dd2-4023-ab18-359d24a8a4fd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_002/1762652579.7226508", + "retrieved_timestamp": "1762652579.7226508", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_002", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_002" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5306885729863429 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4682582050072746 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0581570996978852 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42546875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28939494680851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR/285688d5-c7ad-437b-a54c-9e6108d85267.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR/285688d5-c7ad-437b-a54c-9e6108d85267.json new file mode 100644 index 000000000..b4eb63e30 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR/285688d5-c7ad-437b-a54c-9e6108d85267.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_MUSR/1762652579.722848", + "retrieved_timestamp": "1762652579.7228491", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.478606763387811 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4671769411194033 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48689583333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2828291223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder/85ce2909-a5f9-413a-8719-cd0a66874535.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder/85ce2909-a5f9-413a-8719-cd0a66874535.json new file mode 100644 index 000000000..d62a462cf --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder/85ce2909-a5f9-413a-8719-cd0a66874535.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_MasterCoder/1762652579.723048", + "retrieved_timestamp": "1762652579.723048", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.414259719765777 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4689417813020516 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47197916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27194148936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001/8a7df636-f1bb-4a74-bb7f-8a412edf6bd1.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001/8a7df636-f1bb-4a74-bb7f-8a412edf6bd1.json new file mode 100644 index 000000000..60608b24d --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001/8a7df636-f1bb-4a74-bb7f-8a412edf6bd1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_001/1762652579.723258", + "retrieved_timestamp": "1762652579.723258", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4571492528712705 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48178882135920675 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06948640483383686 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47784375000000007 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2681183510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003/79336acd-d465-4938-af7f-f7a688f46fd4.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003/79336acd-d465-4938-af7f-f7a688f46fd4.json new file mode 100644 index 000000000..c6f0eb77c --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003/79336acd-d465-4938-af7f-f7a688f46fd4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_003/1762652579.723467", + "retrieved_timestamp": "1762652579.723468", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6200148938150774 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4755509035158693 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06948640483383686 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42019791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29986702127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/ed000ee0-4193-46c4-8114-2ea3dbfec9f7.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/ed000ee0-4193-46c4-8114-2ea3dbfec9f7.json new file mode 100644 index 000000000..d58852002 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/ed000ee0-4193-46c4-8114-2ea3dbfec9f7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/1762652579.7236722", + "retrieved_timestamp": "1762652579.7236722", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5950854842927876 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4927473238025393 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5198229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2999501329787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student/89f92d24-19c1-4021-819d-9c7ed717046c.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student/89f92d24-19c1-4021-819d-9c7ed717046c.json new file mode 100644 index 000000000..88b3dd72b --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student/89f92d24-19c1-4021-819d-9c7ed717046c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Student/1762652579.723874", + "retrieved_timestamp": "1762652579.723874", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5735781060918363 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48808115770970123 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.50975 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.292719414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher/24fa44cb-86d9-4e67-be8f-42f7fc574d52.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher/24fa44cb-86d9-4e67-be8f-42f7fc574d52.json new file mode 100644 index 000000000..703f7effe --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher/24fa44cb-86d9-4e67-be8f-42f7fc574d52.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Teacher/1762652579.7241092", + "retrieved_timestamp": "1762652579.7241101", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5772250960784053 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4805094960871836 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5222395833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2956283244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001/b13652e3-43f1-4670-94f7-1a0bbf622f33.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001/b13652e3-43f1-4670-94f7-1a0bbf622f33.json new file mode 100644 index 000000000..0ecf40f5d --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001/b13652e3-43f1-4670-94f7-1a0bbf622f33.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_001/1762652579.72431", + "retrieved_timestamp": "1762652579.724311", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5817963004827191 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4907982146977475 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4486041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29055851063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002/8201723e-92fb-4207-afa8-df7db794c889.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002/8201723e-92fb-4207-afa8-df7db794c889.json new file mode 100644 index 000000000..4d5bb7c80 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002/8201723e-92fb-4207-afa8-df7db794c889.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_002/1762652579.7245262", + "retrieved_timestamp": "1762652579.7245262", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.546150879665953 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4655028607746287 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04984894259818731 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45108333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28665226063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder/e166fa17-c285-466e-ab2e-1eb106ebd271.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder/e166fa17-c285-466e-ab2e-1eb106ebd271.json new file mode 100644 index 000000000..7e3613129 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder/e166fa17-c285-466e-ab2e-1eb106ebd271.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Coder/1762652579.724742", + "retrieved_timestamp": "1762652579.724742", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4923702442851634 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46376531085099754 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5624583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28897938829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math/983323f2-7caa-42cb-8838-8ea041303a70.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math/983323f2-7caa-42cb-8838-8ea041303a70.json new file mode 100644 index 000000000..af1ee1558 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math/983323f2-7caa-42cb-8838-8ea041303a70.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Math/1762652579.7249558", + "retrieved_timestamp": "1762652579.724957", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5033112142448702 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4676503002757066 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4325729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29130651595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster/a79378f7-01b3-4bf0-8b76-2e670d2a7366.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster/a79378f7-01b3-4bf0-8b76-2e670d2a7366.json new file mode 100644 index 000000000..577f533c5 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster/a79378f7-01b3-4bf0-8b76-2e670d2a7366.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_MathMaster/1762652579.7251709", + "retrieved_timestamp": "1762652579.7251709", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5558429411738631 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47422312505675873 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45098958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2672041223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder/1e7531fc-9f12-4c7c-8bf5-44511c37c23b.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder/1e7531fc-9f12-4c7c-8bf5-44511c37c23b.json new file mode 100644 index 000000000..96e6f8469 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder/1e7531fc-9f12-4c7c-8bf5-44511c37c23b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Student_Coder/1762652579.725384", + "retrieved_timestamp": "1762652579.725385", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5449518388985669 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4650844324968853 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06570996978851963 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43883333333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684507978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder/64c0088b-f9e7-4a9a-b449-3e1b514370ff.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder/64c0088b-f9e7-4a9a-b449-3e1b514370ff.json new file mode 100644 index 000000000..4a003e762 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder/64c0088b-f9e7-4a9a-b449-3e1b514370ff.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Teacher_Coder/1762652579.7256", + "retrieved_timestamp": "1762652579.725601", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5081572449988254 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47965526444811907 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4338125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28449135638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student/d652c8f6-d5b4-482f-91c7-5eb9529765c1.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student/d652c8f6-d5b4-482f-91c7-5eb9529765c1.json new file mode 100644 index 000000000..b2e272a29 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student/d652c8f6-d5b4-482f-91c7-5eb9529765c1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Top_Student/1762652579.725811", + "retrieved_timestamp": "1762652579.725811", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6039530667517742 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49877449828070924 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07250755287009064 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5397916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30244348404255317 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1/7c72e837-92fd-4f3b-9c4f-205ffc93ac70.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1/7c72e837-92fd-4f3b-9c4f-205ffc93ac70.json new file mode 100644 index 000000000..48ee4f7d4 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1/7c72e837-92fd-4f3b-9c4f-205ffc93ac70.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_X1/1762652579.7260191", + "retrieved_timestamp": "1762652579.72602", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.427323944910615 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47589342126093026 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4231770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2890625 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2/169fe3b3-527a-408f-9442-5bc3616cc320.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2/169fe3b3-527a-408f-9442-5bc3616cc320.json new file mode 100644 index 000000000..1ce8b240c --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2/169fe3b3-527a-408f-9442-5bc3616cc320.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_X2/1762652579.7262201", + "retrieved_timestamp": "1762652579.726221", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5433782364127182 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4785559277736029 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46953125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29205452127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1/fd4405cf-9849-4606-a01c-a20459198853.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1/fd4405cf-9849-4606-a01c-a20459198853.json new file mode 100644 index 000000000..b572d74c3 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1/fd4405cf-9849-4606-a01c-a20459198853.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_RP_R1/1762652579.726439", + "retrieved_timestamp": "1762652579.72644", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5426036250482054 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4701061648636955 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42013541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28939494680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_BIBLE_002/060f29d1-8b1d-4651-808d-b1419bd76cd9.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_BIBLE_002/060f29d1-8b1d-4651-808d-b1419bd76cd9.json new file mode 100644 index 000000000..face45a36 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_BIBLE_002/060f29d1-8b1d-4651-808d-b1419bd76cd9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_BIBLE_002/1762652579.72666", + "retrieved_timestamp": "1762652579.7266612", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_BIBLE_002", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_BIBLE_002" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21949538336059432 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3289070186514165 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.017371601208459216 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34069791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13680186170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatML_002/07981f28-b019-42f8-b14b-44ab73ebaa0a.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatML_002/07981f28-b019-42f8-b14b-44ab73ebaa0a.json new file mode 100644 index 000000000..abb87a72e --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatML_002/07981f28-b019-42f8-b14b-44ab73ebaa0a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_ChatML_002/1762652579.7268748", + "retrieved_timestamp": "1762652579.726876", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_ChatML_002", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_ChatML_002" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24122772022677608 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3106383598957094 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.011329305135951661 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3623125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10945811170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA/4e72d3b7-4ebb-470d-8f86-66d6cb28095f.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA/4e72d3b7-4ebb-470d-8f86-66d6cb28095f.json new file mode 100644 index 000000000..04e94baff --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA/4e72d3b7-4ebb-470d-8f86-66d6cb28095f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_ChatQA/1762652579.727107", + "retrieved_timestamp": "1762652579.727108", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_ChatQA", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_ChatQA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1414591062824417 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32359493837413505 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.009818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3447291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14752327127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA_003/471aac2a-5c4b-4b1b-a56b-490fafc444d8.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA_003/471aac2a-5c4b-4b1b-a56b-490fafc444d8.json new file mode 100644 index 000000000..14a58bd00 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_ChatQA_003/471aac2a-5c4b-4b1b-a56b-490fafc444d8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_ChatQA_003/1762652579.727351", + "retrieved_timestamp": "1762652579.7273521", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_ChatQA_003", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_ChatQA_003" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22091938279321088 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3171811407815537 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38184375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11328125 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_TEMP_/f44f513c-0814-4f3b-94a4-9e28318da40e.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_TEMP_/f44f513c-0814-4f3b-94a4-9e28318da40e.json new file mode 100644 index 000000000..8c5d3b369 --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_TEMP_/f44f513c-0814-4f3b-94a4-9e28318da40e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_TEMP_/1762652579.7275891", + "retrieved_timestamp": "1762652579.7275898", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_TEMP_", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_TEMP_" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47953097780555587 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.495695749059555 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12386706948640483 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42175 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3120844414893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_Top_Teacher_/a4beba0f-b860-4d7d-b1c3-0f569ba59171.json b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_Top_Teacher_/a4beba0f-b860-4d7d-b1c3-0f569ba59171.json new file mode 100644 index 000000000..af98c2e3d --- /dev/null +++ b/data/hfopenllm_v2/LeroyDyer/_Spydaz_Web_AI_Top_Teacher_/a4beba0f-b860-4d7d-b1c3-0f569ba59171.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_Top_Teacher_/1762652579.728002", + "retrieved_timestamp": "1762652579.728004", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LeroyDyer/_Spydaz_Web_AI_Top_Teacher_", + "developer": "LeroyDyer", + "inference_platform": "unknown", + "id": "LeroyDyer/_Spydaz_Web_AI_Top_Teacher_" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44038817005545283 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48909617780536035 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11555891238670694 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4366041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3149933510638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.0/cd4408c3-d966-4195-bcf2-5bc80eca1501.json b/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.0/cd4408c3-d966-4195-bcf2-5bc80eca1501.json new file mode 100644 index 000000000..8cc529ce4 --- /dev/null +++ b/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.0/cd4408c3-d966-4195-bcf2-5bc80eca1501.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LightningRodLabs_Flashlight-v1.0/1762652579.7282822", + "retrieved_timestamp": "1762652579.728283", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LightningRodLabs/Flashlight-v1.0", + "developer": "LightningRodLabs", + "inference_platform": "unknown", + "id": "LightningRodLabs/Flashlight-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6745446526327921 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6876833310149727 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49697885196374625 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3422818791946309 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41009375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5402260638297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.1/64c75370-981d-43ae-9823-d4fb0696d468.json b/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.1/64c75370-981d-43ae-9823-d4fb0696d468.json new file mode 100644 index 000000000..0e4b60370 --- /dev/null +++ b/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.1/64c75370-981d-43ae-9823-d4fb0696d468.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LightningRodLabs_Flashlight-v1.1/1762652579.728596", + "retrieved_timestamp": "1762652579.728597", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LightningRodLabs/Flashlight-v1.1", + "developer": "LightningRodLabs", + "inference_platform": "unknown", + "id": "LightningRodLabs/Flashlight-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6720967034136092 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6901141327534415 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5324773413897281 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33976510067114096 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4047604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5415558510638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.2/404afbae-0393-48e6-874c-e1cb28e9a1eb.json b/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.2/404afbae-0393-48e6-874c-e1cb28e9a1eb.json new file mode 100644 index 000000000..e4fb34564 --- /dev/null +++ b/data/hfopenllm_v2/LightningRodLabs/Flashlight-v1.2/404afbae-0393-48e6-874c-e1cb28e9a1eb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LightningRodLabs_Flashlight-v1.2/1762652579.728818", + "retrieved_timestamp": "1762652579.728819", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LightningRodLabs/Flashlight-v1.2", + "developer": "LightningRodLabs", + "inference_platform": "unknown", + "id": "LightningRodLabs/Flashlight-v1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4359920566319587 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3264526807518731 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1555891238670695 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23573825503355705 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45536458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24850398936170212 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1/d53a7070-911a-4a5e-ba0c-766c4f39b3f5.json b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1/d53a7070-911a-4a5e-ba0c-766c4f39b3f5.json new file mode 100644 index 000000000..e942db100 --- /dev/null +++ b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1/d53a7070-911a-4a5e-ba0c-766c4f39b3f5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V1/1762652579.7290292", + "retrieved_timestamp": "1762652579.72903", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1", + "developer": "Lil-R", + "inference_platform": "unknown", + "id": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5823459531820016 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4287069505821554 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09138972809667674 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43746875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2677859042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2/25368664-1f32-4d69-9afc-91d58efd01e2.json b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2/25368664-1f32-4d69-9afc-91d58efd01e2.json new file mode 100644 index 000000000..c7153e4d8 --- /dev/null +++ b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2/25368664-1f32-4d69-9afc-91d58efd01e2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V2/1762652579.729285", + "retrieved_timestamp": "1762652579.729285", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2", + "developer": "Lil-R", + "inference_platform": "unknown", + "id": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5542693386880144 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43764741906109417 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09441087613293052 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44816666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2744348404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1/dcadbfb3-fbeb-4108-bc27-7ccfc7ba1e3a.json b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1/dcadbfb3-fbeb-4108-bc27-7ccfc7ba1e3a.json new file mode 100644 index 000000000..4c2b872ed --- /dev/null +++ b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1/dcadbfb3-fbeb-4108-bc27-7ccfc7ba1e3a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V1/1762652579.7297568", + "retrieved_timestamp": "1762652579.7297568", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1", + "developer": "Lil-R", + "inference_platform": "unknown", + "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10733742026711349 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30525797550329686 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0007552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25083892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3910833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11236702127659574 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2/41c47381-66d5-4d3a-8bfb-4269cb882385.json b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2/41c47381-66d5-4d3a-8bfb-4269cb882385.json new file mode 100644 index 000000000..2a4c35a11 --- /dev/null +++ b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2/41c47381-66d5-4d3a-8bfb-4269cb882385.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V2/1762652579.729984", + "retrieved_timestamp": "1762652579.729985", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2", + "developer": "Lil-R", + "inference_platform": "unknown", + "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10733742026711349 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30525797550329686 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0007552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25083892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3910833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11236702127659574 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3/0c21359f-8f0b-44a8-813e-a5f612f13658.json b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3/0c21359f-8f0b-44a8-813e-a5f612f13658.json new file mode 100644 index 000000000..1067477e2 --- /dev/null +++ b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3/0c21359f-8f0b-44a8-813e-a5f612f13658.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V3/1762652579.730203", + "retrieved_timestamp": "1762652579.730203", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3", + "developer": "Lil-R", + "inference_platform": "unknown", + "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22346706738121516 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.357839880712804 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4107083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18168218085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP/aa396cb3-10aa-4777-a185-fcb38ffc5ec3.json b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP/aa396cb3-10aa-4777-a185-fcb38ffc5ec3.json new file mode 100644 index 000000000..f87fcf58b --- /dev/null +++ b/data/hfopenllm_v2/Lil-R/2_PRYMMAL-ECE-7B-SLERP/aa396cb3-10aa-4777-a185-fcb38ffc5ec3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-7B-SLERP/1762652579.7294989", + "retrieved_timestamp": "1762652579.7294998", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lil-R/2_PRYMMAL-ECE-7B-SLERP", + "developer": "Lil-R", + "inference_platform": "unknown", + "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5577412376937636 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5556642048146725 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3632930513595166 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43960416666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45071476063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-1B-SLERP-V1/a863e655-ee86-4f39-ae1a-0a65992f7eb4.json b/data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-1B-SLERP-V1/a863e655-ee86-4f39-ae1a-0a65992f7eb4.json new file mode 100644 index 000000000..97184e57e --- /dev/null +++ b/data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-1B-SLERP-V1/a863e655-ee86-4f39-ae1a-0a65992f7eb4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lil-R_PRYMMAL-ECE-1B-SLERP-V1/1762652579.7304142", + "retrieved_timestamp": "1762652579.730415", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lil-R/PRYMMAL-ECE-1B-SLERP-V1", + "developer": "Lil-R", + "inference_platform": "unknown", + "id": "Lil-R/PRYMMAL-ECE-1B-SLERP-V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2874395492847866 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41904526564708194 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10347432024169184 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39743749999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2925531914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-7B-SLERP-V8/6a81c514-57b9-4a45-9a1a-0378e7554d04.json b/data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-7B-SLERP-V8/6a81c514-57b9-4a45-9a1a-0378e7554d04.json new file mode 100644 index 000000000..480fc05d2 --- /dev/null +++ b/data/hfopenllm_v2/Lil-R/PRYMMAL-ECE-7B-SLERP-V8/6a81c514-57b9-4a45-9a1a-0378e7554d04.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lil-R_PRYMMAL-ECE-7B-SLERP-V8/1762652579.7306318", + "retrieved_timestamp": "1762652579.730633", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lil-R/PRYMMAL-ECE-7B-SLERP-V8", + "developer": "Lil-R", + "inference_platform": "unknown", + "id": "Lil-R/PRYMMAL-ECE-7B-SLERP-V8" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1258471965495995 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2955092966258663 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.009818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36314583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11278257978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LilRg/10PRYMMAL-3B-slerp/e9371530-675d-48d1-9145-7ea15c893833.json b/data/hfopenllm_v2/LilRg/10PRYMMAL-3B-slerp/e9371530-675d-48d1-9145-7ea15c893833.json new file mode 100644 index 000000000..4ccb0fe08 --- /dev/null +++ b/data/hfopenllm_v2/LilRg/10PRYMMAL-3B-slerp/e9371530-675d-48d1-9145-7ea15c893833.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LilRg_10PRYMMAL-3B-slerp/1762652579.7308428", + "retrieved_timestamp": "1762652579.7308428", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LilRg/10PRYMMAL-3B-slerp", + "developer": "LilRg", + "inference_platform": "unknown", + "id": "LilRg/10PRYMMAL-3B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1945903535951276 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5320377091634505 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14954682779456194 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45290625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3881316489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LilRg/ECE-1B-merge-PRYMMAL/3fefac8e-d5aa-4998-ab60-6e3dcc49f77f.json b/data/hfopenllm_v2/LilRg/ECE-1B-merge-PRYMMAL/3fefac8e-d5aa-4998-ab60-6e3dcc49f77f.json new file mode 100644 index 000000000..f2f226107 --- /dev/null +++ b/data/hfopenllm_v2/LilRg/ECE-1B-merge-PRYMMAL/3fefac8e-d5aa-4998-ab60-6e3dcc49f77f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LilRg_ECE-1B-merge-PRYMMAL/1762652579.7310941", + "retrieved_timestamp": "1762652579.731095", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LilRg/ECE-1B-merge-PRYMMAL", + "developer": "LilRg", + "inference_platform": "unknown", + "id": "LilRg/ECE-1B-merge-PRYMMAL" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27122811916825135 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42345600176908743 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10120845921450151 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3801041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2906416223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LilRg/ECE_Finetunning/f20fd926-d690-4fe2-80a4-3e79dc37f03f.json b/data/hfopenllm_v2/LilRg/ECE_Finetunning/f20fd926-d690-4fe2-80a4-3e79dc37f03f.json new file mode 100644 index 000000000..1b0c45574 --- /dev/null +++ b/data/hfopenllm_v2/LilRg/ECE_Finetunning/f20fd926-d690-4fe2-80a4-3e79dc37f03f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LilRg_ECE_Finetunning/1762652579.731307", + "retrieved_timestamp": "1762652579.731308", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LilRg/ECE_Finetunning", + "developer": "LilRg", + "inference_platform": "unknown", + "id": "LilRg/ECE_Finetunning" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04453849120334047 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47321596790730514 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38394791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3191489361702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 16.061 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-6B-slerp/8fedde0a-96fe-4a6f-9e0f-87832cfd418e.json b/data/hfopenllm_v2/LilRg/PRYMMAL-6B-slerp/8fedde0a-96fe-4a6f-9e0f-87832cfd418e.json new file mode 100644 index 000000000..dfa2ca042 --- /dev/null +++ b/data/hfopenllm_v2/LilRg/PRYMMAL-6B-slerp/8fedde0a-96fe-4a6f-9e0f-87832cfd418e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-6B-slerp/1762652579.731526", + "retrieved_timestamp": "1762652579.7315269", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LilRg/PRYMMAL-6B-slerp", + "developer": "LilRg", + "inference_platform": "unknown", + "id": "LilRg/PRYMMAL-6B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11533065599276586 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28676215692036117 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24580536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36975 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1107878989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.293 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V3/a656eacf-8134-446c-8417-e1c3c54fe941.json b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V3/a656eacf-8134-446c-8417-e1c3c54fe941.json new file mode 100644 index 000000000..94d56548c --- /dev/null +++ b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V3/a656eacf-8134-446c-8417-e1c3c54fe941.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V3/1762652579.731744", + "retrieved_timestamp": "1762652579.731745", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V3", + "developer": "LilRg", + "inference_platform": "unknown", + "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12432346174816154 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2957239084980124 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.009818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36714583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11269946808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V4/0d276bd3-a338-4383-88b0-9e653ae01387.json b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V4/0d276bd3-a338-4383-88b0-9e653ae01387.json new file mode 100644 index 000000000..22d46842f --- /dev/null +++ b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V4/0d276bd3-a338-4383-88b0-9e653ae01387.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V4/1762652579.731953", + "retrieved_timestamp": "1762652579.7319539", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V4", + "developer": "LilRg", + "inference_platform": "unknown", + "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12492298213185458 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2957239084980124 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.009818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36714583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11269946808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V5/150d0730-e194-4d2b-96e1-54f914b5fe28.json b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V5/150d0730-e194-4d2b-96e1-54f914b5fe28.json new file mode 100644 index 000000000..372e6d6b2 --- /dev/null +++ b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V5/150d0730-e194-4d2b-96e1-54f914b5fe28.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V5/1762652579.7321632", + "retrieved_timestamp": "1762652579.7321641", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V5", + "developer": "LilRg", + "inference_platform": "unknown", + "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12492298213185458 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2957239084980124 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.009818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36714583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11269946808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V6/b23913b9-f774-4927-be16-874d8e146218.json b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V6/b23913b9-f774-4927-be16-874d8e146218.json new file mode 100644 index 000000000..bb8300357 --- /dev/null +++ b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V6/b23913b9-f774-4927-be16-874d8e146218.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V6/1762652579.732379", + "retrieved_timestamp": "1762652579.732379", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V6", + "developer": "LilRg", + "inference_platform": "unknown", + "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12432346174816154 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2957239084980124 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.009818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36714583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11269946808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V7/dd12d7df-9b32-4d2a-ae9a-40304cf4bfd7.json b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V7/dd12d7df-9b32-4d2a-ae9a-40304cf4bfd7.json new file mode 100644 index 000000000..955a38e0f --- /dev/null +++ b/data/hfopenllm_v2/LilRg/PRYMMAL-ECE-7B-SLERP-V7/dd12d7df-9b32-4d2a-ae9a-40304cf4bfd7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V7/1762652579.732605", + "retrieved_timestamp": "1762652579.732606", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V7", + "developer": "LilRg", + "inference_platform": "unknown", + "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12492298213185458 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2957239084980124 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.009818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36714583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11269946808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LilRg/PRYMMAL-slerp-Merge/9574abe0-00e3-4e38-bda0-b217f002a480.json b/data/hfopenllm_v2/LilRg/PRYMMAL-slerp-Merge/9574abe0-00e3-4e38-bda0-b217f002a480.json new file mode 100644 index 000000000..5c0530fca --- /dev/null +++ b/data/hfopenllm_v2/LilRg/PRYMMAL-slerp-Merge/9574abe0-00e3-4e38-bda0-b217f002a480.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-slerp-Merge/1762652579.732816", + "retrieved_timestamp": "1762652579.732817", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LilRg/PRYMMAL-slerp-Merge", + "developer": "LilRg", + "inference_platform": "unknown", + "id": "LilRg/PRYMMAL-slerp-Merge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.304400102838247 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5364156271768925 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16163141993957703 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46347916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3863031914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/d020a655-1cc0-49e9-9db1-f8b871babd5c.json b/data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/d020a655-1cc0-49e9-9db1-f8b871babd5c.json new file mode 100644 index 000000000..750e9ad21 --- /dev/null +++ b/data/hfopenllm_v2/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/d020a655-1cc0-49e9-9db1-f8b871babd5c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/1762652579.733827", + "retrieved_timestamp": "1762652579.733829", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged", + "developer": "LimYeri", + "inference_platform": "unknown", + "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6492406813920397 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48526582322240047 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3607916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3353557180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Locutusque/CollectiveLM-Falcon-3-7B/44737b7e-4942-4496-a818-fddce66da4d6.json b/data/hfopenllm_v2/Locutusque/CollectiveLM-Falcon-3-7B/44737b7e-4942-4496-a818-fddce66da4d6.json new file mode 100644 index 000000000..3167c7e62 --- /dev/null +++ b/data/hfopenllm_v2/Locutusque/CollectiveLM-Falcon-3-7B/44737b7e-4942-4496-a818-fddce66da4d6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Locutusque_CollectiveLM-Falcon-3-7B/1762652579.734693", + "retrieved_timestamp": "1762652579.734694", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Locutusque/CollectiveLM-Falcon-3-7B", + "developer": "Locutusque", + "inference_platform": "unknown", + "id": "Locutusque/CollectiveLM-Falcon-3-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3918281271470808 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5105131374222629 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21827794561933533 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32550335570469796 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3887291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35987367021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Luni/StarDust-12b-v1/fa64b745-6b4b-4fee-b77e-d744e54a17d6.json b/data/hfopenllm_v2/Luni/StarDust-12b-v1/fa64b745-6b4b-4fee-b77e-d744e54a17d6.json new file mode 100644 index 000000000..2b59cf1b7 --- /dev/null +++ b/data/hfopenllm_v2/Luni/StarDust-12b-v1/fa64b745-6b4b-4fee-b77e-d744e54a17d6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Luni_StarDust-12b-v1/1762652579.736537", + "retrieved_timestamp": "1762652579.7365382", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Luni/StarDust-12b-v1", + "developer": "Luni", + "inference_platform": "unknown", + "id": "Luni/StarDust-12b-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5459259210007226 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5366139363101082 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07628398791540786 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43244791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34117353723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Luni/StarDust-12b-v2/401f6afc-9a2a-4bfe-87b2-daa6df848424.json b/data/hfopenllm_v2/Luni/StarDust-12b-v2/401f6afc-9a2a-4bfe-87b2-daa6df848424.json new file mode 100644 index 000000000..564236d26 --- /dev/null +++ b/data/hfopenllm_v2/Luni/StarDust-12b-v2/401f6afc-9a2a-4bfe-87b2-daa6df848424.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Luni_StarDust-12b-v2/1762652579.736784", + "retrieved_timestamp": "1762652579.736785", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Luni/StarDust-12b-v2", + "developer": "Luni", + "inference_platform": "unknown", + "id": "Luni/StarDust-12b-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5628620947973599 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5419479534912178 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06873111782477341 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4338125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3439162234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/8fdc62c0-215c-4502-8f56-188455fe2d9e.json b/data/hfopenllm_v2/Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/8fdc62c0-215c-4502-8f56-188455fe2d9e.json new file mode 100644 index 000000000..7b5062a2f --- /dev/null +++ b/data/hfopenllm_v2/Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/8fdc62c0-215c-4502-8f56-188455fe2d9e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lyte_Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/1762652579.74142", + "retrieved_timestamp": "1762652579.74142", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3", + "developer": "Lyte", + "inference_platform": "unknown", + "id": "Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7098155117310957 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4949521619329585 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1903323262839879 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.346125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36178523936170215 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/ea928079-f00f-41b1-a628-c1539b41e63d.json b/data/hfopenllm_v2/Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/ea928079-f00f-41b1-a628-c1539b41e63d.json new file mode 100644 index 000000000..6e299d24d --- /dev/null +++ b/data/hfopenllm_v2/Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/ea928079-f00f-41b1-a628-c1539b41e63d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lyte_Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/1762652579.7416818", + "retrieved_timestamp": "1762652579.741683", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04", + "developer": "Lyte", + "inference_platform": "unknown", + "id": "Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5773503193748144 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3515036874279285 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08006042296072508 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32355208333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18425864361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/ab59c1cb-ac90-4fe1-b782-2e038734366e.json b/data/hfopenllm_v2/MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/ab59c1cb-ac90-4fe1-b782-2e038734366e.json new file mode 100644 index 000000000..40c11e18c --- /dev/null +++ b/data/hfopenllm_v2/MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/ab59c1cb-ac90-4fe1-b782-2e038734366e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MEscriva_ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/1762652579.7424488", + "retrieved_timestamp": "1762652579.7424488", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis", + "developer": "MEscriva", + "inference_platform": "unknown", + "id": "MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08662903318749807 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.305728612437881 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888216 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40171874999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11544215425531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MTSAIR/Cotype-Nano/b5fa19ff-9b05-4d71-9d79-54f8dfe4a8ab.json b/data/hfopenllm_v2/MTSAIR/Cotype-Nano/b5fa19ff-9b05-4d71-9d79-54f8dfe4a8ab.json new file mode 100644 index 000000000..bc85947ba --- /dev/null +++ b/data/hfopenllm_v2/MTSAIR/Cotype-Nano/b5fa19ff-9b05-4d71-9d79-54f8dfe4a8ab.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MTSAIR_Cotype-Nano/1762652579.742943", + "retrieved_timestamp": "1762652579.742944", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MTSAIR/Cotype-Nano", + "developer": "MTSAIR", + "inference_platform": "unknown", + "id": "MTSAIR/Cotype-Nano" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3747922179816221 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3864940969601492 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09743202416918428 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3289166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24767287234042554 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MTSAIR/MultiVerse_70B/a713dba7-110a-40a0-9d89-d48567d423af.json b/data/hfopenllm_v2/MTSAIR/MultiVerse_70B/a713dba7-110a-40a0-9d89-d48567d423af.json new file mode 100644 index 000000000..2b9060f16 --- /dev/null +++ b/data/hfopenllm_v2/MTSAIR/MultiVerse_70B/a713dba7-110a-40a0-9d89-d48567d423af.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MTSAIR_MultiVerse_70B/1762652579.743202", + "retrieved_timestamp": "1762652579.7432032", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MTSAIR/MultiVerse_70B", + "developer": "MTSAIR", + "inference_platform": "unknown", + "id": "MTSAIR/MultiVerse_70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5249183278146429 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6183134284931178 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19259818731117825 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3540268456375839 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47398958333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48603723404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 72.289 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1/f3024d7f-f25f-4220-973a-b0e19ecb5e1d.json b/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1/f3024d7f-f25f-4220-973a-b0e19ecb5e1d.json new file mode 100644 index 000000000..65c689416 --- /dev/null +++ b/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1/f3024d7f-f25f-4220-973a-b0e19ecb5e1d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.1/1762652579.743415", + "retrieved_timestamp": "1762652579.743416", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1", + "developer": "Magpie-Align", + "inference_platform": "unknown", + "id": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4361416596851908 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4615102744527366 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32773958333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2863198138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3/4756be0b-fd98-467f-a256-73aabba09c97.json b/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3/4756be0b-fd98-467f-a256-73aabba09c97.json new file mode 100644 index 000000000..b42aec033 --- /dev/null +++ b/data/hfopenllm_v2/Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3/4756be0b-fd98-467f-a256-73aabba09c97.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.3/1762652579.743664", + "retrieved_timestamp": "1762652579.743665", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3", + "developer": "Magpie-Align", + "inference_platform": "unknown", + "id": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5063586838477463 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45715808996720547 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07326283987915408 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34237500000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902260638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1/43d2e788-e186-485d-8c34-10bdfd7a6b65.json b/data/hfopenllm_v2/Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1/43d2e788-e186-485d-8c34-10bdfd7a6b65.json new file mode 100644 index 000000000..b691d52b9 --- /dev/null +++ b/data/hfopenllm_v2/Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1/43d2e788-e186-485d-8c34-10bdfd7a6b65.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3.1-8B-Magpie-Align-SFT-v0.1/1762652579.744527", + "retrieved_timestamp": "1762652579.744527", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1", + "developer": "Magpie-Align", + "inference_platform": "unknown", + "id": "Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47820671374176077 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4764157817799906 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08987915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3397395833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29429853723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-Chat-v0.1/b14fcc84-7caf-4aa8-b728-8a1287a5c04a.json b/data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-Chat-v0.1/b14fcc84-7caf-4aa8-b728-8a1287a5c04a.json new file mode 100644 index 000000000..9d2d80a8a --- /dev/null +++ b/data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-Chat-v0.1/b14fcc84-7caf-4aa8-b728-8a1287a5c04a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Magpie-Align_MagpieLM-8B-Chat-v0.1/1762652579.744951", + "retrieved_timestamp": "1762652579.744951", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magpie-Align/MagpieLM-8B-Chat-v0.1", + "developer": "Magpie-Align", + "inference_platform": "unknown", + "id": "Magpie-Align/MagpieLM-8B-Chat-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3700714105240761 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4172338260055306 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3500625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3194813829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-SFT-v0.1/eb307f58-db7e-44b3-bf03-7264a39bed69.json b/data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-SFT-v0.1/eb307f58-db7e-44b3-bf03-7264a39bed69.json new file mode 100644 index 000000000..f831d8c47 --- /dev/null +++ b/data/hfopenllm_v2/Magpie-Align/MagpieLM-8B-SFT-v0.1/eb307f58-db7e-44b3-bf03-7264a39bed69.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Magpie-Align_MagpieLM-8B-SFT-v0.1/1762652579.7451751", + "retrieved_timestamp": "1762652579.7451751", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magpie-Align/MagpieLM-8B-SFT-v0.1", + "developer": "Magpie-Align", + "inference_platform": "unknown", + "id": "Magpie-Align/MagpieLM-8B-SFT-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4720619068515982 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45528501595553356 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0755287009063444 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3648854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2989527925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ManoloPueblo/ContentCuisine_1-7B-slerp/74d2724e-9d5d-4142-9cff-3fd40c931882.json b/data/hfopenllm_v2/ManoloPueblo/ContentCuisine_1-7B-slerp/74d2724e-9d5d-4142-9cff-3fd40c931882.json new file mode 100644 index 000000000..ef26313d3 --- /dev/null +++ b/data/hfopenllm_v2/ManoloPueblo/ContentCuisine_1-7B-slerp/74d2724e-9d5d-4142-9cff-3fd40c931882.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ManoloPueblo_ContentCuisine_1-7B-slerp/1762652579.745631", + "retrieved_timestamp": "1762652579.745632", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ManoloPueblo/ContentCuisine_1-7B-slerp", + "developer": "ManoloPueblo", + "inference_platform": "unknown", + "id": "ManoloPueblo/ContentCuisine_1-7B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3907044419916932 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5188437309746964 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07326283987915408 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46719791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30535239361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC2/f7ca7fb6-b02c-4c27-afef-662bb62cd054.json b/data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC2/f7ca7fb6-b02c-4c27-afef-662bb62cd054.json new file mode 100644 index 000000000..6be3c15dd --- /dev/null +++ b/data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC2/f7ca7fb6-b02c-4c27-afef-662bb62cd054.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ManoloPueblo_LLM_MERGE_CC2/1762652579.745891", + "retrieved_timestamp": "1762652579.745892", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ManoloPueblo/LLM_MERGE_CC2", + "developer": "ManoloPueblo", + "inference_platform": "unknown", + "id": "ManoloPueblo/LLM_MERGE_CC2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3853087585384557 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5209367401710429 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06419939577039276 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45929166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30319148936170215 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC3/1c3dfe6a-28e7-4125-a802-1898336b1beb.json b/data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC3/1c3dfe6a-28e7-4125-a802-1898336b1beb.json new file mode 100644 index 000000000..992b2cf6f --- /dev/null +++ b/data/hfopenllm_v2/ManoloPueblo/LLM_MERGE_CC3/1c3dfe6a-28e7-4125-a802-1898336b1beb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ManoloPueblo_LLM_MERGE_CC3/1762652579.7460978", + "retrieved_timestamp": "1762652579.746099", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ManoloPueblo/LLM_MERGE_CC3", + "developer": "ManoloPueblo", + "inference_platform": "unknown", + "id": "ManoloPueblo/LLM_MERGE_CC3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3958751667797001 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5246290546274339 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07930513595166164 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4671666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3155751329787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MarinaraSpaghetti/NemoReRemix-12B/ac67a9d9-0f5a-4891-a9e5-2a924fbf4f72.json b/data/hfopenllm_v2/MarinaraSpaghetti/NemoReRemix-12B/ac67a9d9-0f5a-4891-a9e5-2a924fbf4f72.json new file mode 100644 index 000000000..e2e4d79ed --- /dev/null +++ b/data/hfopenllm_v2/MarinaraSpaghetti/NemoReRemix-12B/ac67a9d9-0f5a-4891-a9e5-2a924fbf4f72.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MarinaraSpaghetti_NemoReRemix-12B/1762652579.7463942", + "retrieved_timestamp": "1762652579.746399", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MarinaraSpaghetti/NemoReRemix-12B", + "developer": "MarinaraSpaghetti", + "inference_platform": "unknown", + "id": "MarinaraSpaghetti/NemoReRemix-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33425089872649016 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5536511805668158 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09063444108761329 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4501458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3597905585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MarinaraSpaghetti/Nemomix-v4.0-12B/aeac3ed0-e93b-4fb2-bdd5-1fd06ccd3338.json b/data/hfopenllm_v2/MarinaraSpaghetti/Nemomix-v4.0-12B/aeac3ed0-e93b-4fb2-bdd5-1fd06ccd3338.json new file mode 100644 index 000000000..e8a9387e0 --- /dev/null +++ b/data/hfopenllm_v2/MarinaraSpaghetti/Nemomix-v4.0-12B/aeac3ed0-e93b-4fb2-bdd5-1fd06ccd3338.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MarinaraSpaghetti_Nemomix-v4.0-12B/1762652579.746819", + "retrieved_timestamp": "1762652579.7468212", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MarinaraSpaghetti/Nemomix-v4.0-12B", + "developer": "MarinaraSpaghetti", + "inference_platform": "unknown", + "id": "MarinaraSpaghetti/Nemomix-v4.0-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5574664113441224 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5274986611124783 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10800604229607251 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42444791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36128656914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/2c99d2a7-7a5f-4357-ad92-745d8a718ee3.json b/data/hfopenllm_v2/Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/2c99d2a7-7a5f-4357-ad92-745d8a718ee3.json new file mode 100644 index 000000000..6838cf4e5 --- /dev/null +++ b/data/hfopenllm_v2/Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/2c99d2a7-7a5f-4357-ad92-745d8a718ee3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Marsouuu_MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/1762652579.747071", + "retrieved_timestamp": "1762652579.747073", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial", + "developer": "Marsouuu", + "inference_platform": "unknown", + "id": "Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25484159807089635 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3952730330493959 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07401812688821752 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40832291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22739361702127658 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Marsouuu/general3B-ECE-PRYMMAL-Martial/6f36320a-dcfd-4e93-87b2-53763dde5c57.json b/data/hfopenllm_v2/Marsouuu/general3B-ECE-PRYMMAL-Martial/6f36320a-dcfd-4e93-87b2-53763dde5c57.json new file mode 100644 index 000000000..af099c17c --- /dev/null +++ b/data/hfopenllm_v2/Marsouuu/general3B-ECE-PRYMMAL-Martial/6f36320a-dcfd-4e93-87b2-53763dde5c57.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Marsouuu_general3B-ECE-PRYMMAL-Martial/1762652579.748109", + "retrieved_timestamp": "1762652579.74811", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Marsouuu/general3B-ECE-PRYMMAL-Martial", + "developer": "Marsouuu", + "inference_platform": "unknown", + "id": "Marsouuu/general3B-ECE-PRYMMAL-Martial" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27222658102722996 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5394350977017502 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15483383685800603 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4700520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38763297872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Marsouuu/general3Bv2-ECE-PRYMMAL-Martial/716552b2-6343-4339-b9f5-a573fa47c384.json b/data/hfopenllm_v2/Marsouuu/general3Bv2-ECE-PRYMMAL-Martial/716552b2-6343-4339-b9f5-a573fa47c384.json new file mode 100644 index 000000000..95475411b --- /dev/null +++ b/data/hfopenllm_v2/Marsouuu/general3Bv2-ECE-PRYMMAL-Martial/716552b2-6343-4339-b9f5-a573fa47c384.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Marsouuu_general3Bv2-ECE-PRYMMAL-Martial/1762652579.748472", + "retrieved_timestamp": "1762652579.7484732", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Marsouuu/general3Bv2-ECE-PRYMMAL-Martial", + "developer": "Marsouuu", + "inference_platform": "unknown", + "id": "Marsouuu/general3Bv2-ECE-PRYMMAL-Martial" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5692817280371636 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5636569831901026 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36706948640483383 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43960416666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4498005319148936 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial/49532386-7e9b-4719-9c24-5d463dea6cfc.json b/data/hfopenllm_v2/Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial/49532386-7e9b-4719-9c24-5d463dea6cfc.json new file mode 100644 index 000000000..3891ec167 --- /dev/null +++ b/data/hfopenllm_v2/Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial/49532386-7e9b-4719-9c24-5d463dea6cfc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Marsouuu_lareneg1_78B-ECE-PRYMMAL-Martial/1762652579.7487411", + "retrieved_timestamp": "1762652579.7487419", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial", + "developer": "Marsouuu", + "inference_platform": "unknown", + "id": "Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2794961812435449 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42301343044108936 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11404833836858005 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38673958333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2922207446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Marsouuu/lareneg3B-ECE-PRYMMAL-Martial/8d0e995d-2859-461b-8be7-60d2b2690d6b.json b/data/hfopenllm_v2/Marsouuu/lareneg3B-ECE-PRYMMAL-Martial/8d0e995d-2859-461b-8be7-60d2b2690d6b.json new file mode 100644 index 000000000..e6da8c755 --- /dev/null +++ b/data/hfopenllm_v2/Marsouuu/lareneg3B-ECE-PRYMMAL-Martial/8d0e995d-2859-461b-8be7-60d2b2690d6b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Marsouuu_lareneg3B-ECE-PRYMMAL-Martial/1762652579.748992", + "retrieved_timestamp": "1762652579.748993", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Marsouuu/lareneg3B-ECE-PRYMMAL-Martial", + "developer": "Marsouuu", + "inference_platform": "unknown", + "id": "Marsouuu/lareneg3B-ECE-PRYMMAL-Martial" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33032908239028 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5453325807578268 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15181268882175228 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47246875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37666223404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial/09b5771f-9ee2-4f4f-9fa9-e0280c33b00f.json b/data/hfopenllm_v2/Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial/09b5771f-9ee2-4f4f-9fa9-e0280c33b00f.json new file mode 100644 index 000000000..d702a851f --- /dev/null +++ b/data/hfopenllm_v2/Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial/09b5771f-9ee2-4f4f-9fa9-e0280c33b00f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Marsouuu_lareneg3Bv2-ECE-PRYMMAL-Martial/1762652579.749232", + "retrieved_timestamp": "1762652579.749232", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial", + "developer": "Marsouuu", + "inference_platform": "unknown", + "id": "Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5753267995585047 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.562336014537904 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36555891238670696 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4369375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45113031914893614 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.1/f4512664-c531-4b13-b76e-e96c2b03febf.json b/data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.1/f4512664-c531-4b13-b76e-e96c2b03febf.json new file mode 100644 index 000000000..a5fdd8645 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.1/f4512664-c531-4b13-b76e-e96c2b03febf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Calme-4x7B-MoE-v0.1/1762652579.7495291", + "retrieved_timestamp": "1762652579.74953", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/Calme-4x7B-MoE-v0.1", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "id": "MaziyarPanahi/Calme-4x7B-MoE-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4315205875964663 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5102819889174134 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08006042296072508 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4198854166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3056848404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.2/ca2df1c9-79b2-453b-9cd1-b607e48f5dd7.json b/data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.2/ca2df1c9-79b2-453b-9cd1-b607e48f5dd7.json new file mode 100644 index 000000000..ffcbc703f --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/Calme-4x7B-MoE-v0.2/ca2df1c9-79b2-453b-9cd1-b607e48f5dd7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Calme-4x7B-MoE-v0.2/1762652579.7498329", + "retrieved_timestamp": "1762652579.749834", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/Calme-4x7B-MoE-v0.2", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "id": "MaziyarPanahi/Calme-4x7B-MoE-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.429447200095746 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5110766802558263 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07401812688821752 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43176041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30576795212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/Llama-3-70B-Instruct-v0.1/1e2759fa-3e87-447b-b0ca-5a4e2e293589.json b/data/hfopenllm_v2/MaziyarPanahi/Llama-3-70B-Instruct-v0.1/1e2759fa-3e87-447b-b0ca-5a4e2e293589.json new file mode 100644 index 000000000..a88c15912 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/Llama-3-70B-Instruct-v0.1/1e2759fa-3e87-447b-b0ca-5a4e2e293589.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Llama-3-70B-Instruct-v0.1/1762652579.750048", + "retrieved_timestamp": "1762652579.750049", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/Llama-3-70B-Instruct-v0.1", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "id": "MaziyarPanahi/Llama-3-70B-Instruct-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47143800671108216 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5366257615951637 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18051359516616314 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4433020833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4617686170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.10/19143059-07d5-44b2-b599-193147f6196a.json b/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.10/19143059-07d5-44b2-b599-193147f6196a.json new file mode 100644 index 000000000..8534dc344 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.10/19143059-07d5-44b2-b599-193147f6196a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Llama-3-8B-Instruct-v0.10/1762652579.750272", + "retrieved_timestamp": "1762652579.750272", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/Llama-3-8B-Instruct-v0.10", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.10" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7667433520835827 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4924311866686311 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42143749999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38622007978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.8/c68859dd-6db0-4bdc-a031-92ac7d1d2585.json b/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.8/c68859dd-6db0-4bdc-a031-92ac7d1d2585.json new file mode 100644 index 000000000..4097bb53d --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.8/c68859dd-6db0-4bdc-a031-92ac7d1d2585.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Llama-3-8B-Instruct-v0.8/1762652579.750486", + "retrieved_timestamp": "1762652579.750487", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/Llama-3-8B-Instruct-v0.8", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.8" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7527549125209998 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49627836815949883 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07779456193353475 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42019791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3853058510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.9/1fb0056b-4f66-404b-89ac-a58185747ce2.json b/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.9/1fb0056b-4f66-404b-89ac-a58185747ce2.json new file mode 100644 index 000000000..1e9bcef47 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/Llama-3-8B-Instruct-v0.9/1fb0056b-4f66-404b-89ac-a58185747ce2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Llama-3-8B-Instruct-v0.9/1762652579.750697", + "retrieved_timestamp": "1762652579.750697", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/Llama-3-8B-Instruct-v0.9", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.9" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.763046494412603 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4936132794870085 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07326283987915408 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4148020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3845578457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.1/ce4ee4fe-8a38-467b-b189-b25311c23c4e.json b/data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.1/ce4ee4fe-8a38-467b-b189-b25311c23c4e.json new file mode 100644 index 000000000..c7d25bbd6 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.1/ce4ee4fe-8a38-467b-b189-b25311c23c4e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Qwen2-7B-Instruct-v0.1/1762652579.7511811", + "retrieved_timestamp": "1762652579.751182", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/Qwen2-7B-Instruct-v0.1", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "id": "MaziyarPanahi/Qwen2-7B-Instruct-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33522498082864577 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5123061019250074 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2212990936555891 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44347916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3857214095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.8/a65af628-f518-4da7-afc5-7cba4234415b.json b/data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.8/a65af628-f518-4da7-afc5-7cba4234415b.json new file mode 100644 index 000000000..81e179870 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/Qwen2-7B-Instruct-v0.8/a65af628-f518-4da7-afc5-7cba4234415b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Qwen2-7B-Instruct-v0.8/1762652579.751401", + "retrieved_timestamp": "1762652579.751402", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/Qwen2-7B-Instruct-v0.8", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "id": "MaziyarPanahi/Qwen2-7B-Instruct-v0.8" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27747266142723526 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4637108491317945 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17673716012084592 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4293125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3566323138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-rys-78b/387000a4-7ef5-46c6-9b5e-9bfe7c2cfc18.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-rys-78b/387000a4-7ef5-46c6-9b5e-9bfe7c2cfc18.json new file mode 100644 index 000000000..be46ff654 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.1-rys-78b/387000a4-7ef5-46c6-9b5e-9bfe7c2cfc18.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-rys-78b/1762652579.752971", + "retrieved_timestamp": "1762652579.752971", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.1-rys-78b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.1-rys-78b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8135547015252862 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7097861139530462 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3942598187311178 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39429530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4693125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5443816489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 77.965 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-rys-78b/cfaafe4c-50a1-4cde-b092-fdbaeea86fb3.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-rys-78b/cfaafe4c-50a1-4cde-b092-fdbaeea86fb3.json new file mode 100644 index 000000000..1c8b2e3e8 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.2-rys-78b/cfaafe4c-50a1-4cde-b092-fdbaeea86fb3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-rys-78b/1762652579.754511", + "retrieved_timestamp": "1762652579.754511", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.2-rys-78b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.2-rys-78b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7986420475449585 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7081014602379213 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4070996978851964 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40687919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45356250000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.538563829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 77.965 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-rys-78b/33a06134-e58d-4bc7-8421-c5ae2f0dcd1f.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-rys-78b/33a06134-e58d-4bc7-8421-c5ae2f0dcd1f.json new file mode 100644 index 000000000..228b775cd --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.3-rys-78b/33a06134-e58d-4bc7-8421-c5ae2f0dcd1f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-rys-78b/1762652579.7562392", + "retrieved_timestamp": "1762652579.7562408", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.3-rys-78b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.3-rys-78b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8065854155862002 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7107763314317289 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39803625377643503 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40436241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45492708333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5475398936170213 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 77.965 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-2.4-rys-78b/48433dc8-40ff-4e36-8c6a-ced33bc22e4f.json b/data/hfopenllm_v2/MaziyarPanahi/calme-2.4-rys-78b/48433dc8-40ff-4e36-8c6a-ced33bc22e4f.json new file mode 100644 index 000000000..588ca075c --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-2.4-rys-78b/48433dc8-40ff-4e36-8c6a-ced33bc22e4f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.4-rys-78b/1762652579.7570088", + "retrieved_timestamp": "1762652579.75701", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.4-rys-78b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.4-rys-78b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8010899967641414 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7279510956242796 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4070996978851964 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40268456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5770624999999999 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7002160904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 77.965 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-baguette-3b/8f0a6518-d153-43ec-b426-02136a2bc367.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-baguette-3b/8f0a6518-d153-43ec-b426-02136a2bc367.json new file mode 100644 index 000000000..18bba3292 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-baguette-3b/8f0a6518-d153-43ec-b426-02136a2bc367.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.1-baguette-3b/1762652579.7580318", + "retrieved_timestamp": "1762652579.7580328", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-3.1-baguette-3b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-3.1-baguette-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6234369251364158 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46833341042911075 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25604229607250756 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40079166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33992686170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.085 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-3b/67915bce-0b54-4996-90f6-cec6def9bbba.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-3b/67915bce-0b54-4996-90f6-cec6def9bbba.json new file mode 100644 index 000000000..f97759091 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-3b/67915bce-0b54-4996-90f6-cec6def9bbba.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.1-instruct-3b/1762652579.758249", + "retrieved_timestamp": "1762652579.75825", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-3.1-instruct-3b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-3.1-instruct-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43359397509718656 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4812730148043098 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17749244712990936 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39520833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.355718085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.085 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-78b/898e5e91-c4c0-4494-baad-37c2bfd1931b.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-78b/898e5e91-c4c0-4494-baad-37c2bfd1931b.json new file mode 100644 index 000000000..b70a6851d --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-3.1-instruct-78b/898e5e91-c4c0-4494-baad-37c2bfd1931b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.1-instruct-78b/1762652579.7584739", + "retrieved_timestamp": "1762652579.758475", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-3.1-instruct-78b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-3.1-instruct-78b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8135547015252862 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7305154498840408 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39274924471299094 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3959731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5890624999999999 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.718500664893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 77.965 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-baguette-3b/e49441f3-99a5-4cdb-bff1-79cc21711bab.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-baguette-3b/e49441f3-99a5-4cdb-bff1-79cc21711bab.json new file mode 100644 index 000000000..4ef2faf8e --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-baguette-3b/e49441f3-99a5-4cdb-bff1-79cc21711bab.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.2-baguette-3b/1762652579.75889", + "retrieved_timestamp": "1762652579.758891", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-3.2-baguette-3b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-3.2-baguette-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6338282423968404 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.470862269902714 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2824773413897281 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40209374999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3337765957446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.085 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-3b/83e46bac-5266-4f65-a4dd-76240b297adc.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-3b/83e46bac-5266-4f65-a4dd-76240b297adc.json new file mode 100644 index 000000000..854147fe0 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-3b/83e46bac-5266-4f65-a4dd-76240b297adc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.2-instruct-3b/1762652579.759095", + "retrieved_timestamp": "1762652579.7590961", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-3.2-instruct-3b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-3.2-instruct-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5533196363426819 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4865641110376735 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21676737160120846 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40469791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36527593085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-78b/77cc280c-b794-4a9a-addc-e2eb0a1af896.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-78b/77cc280c-b794-4a9a-addc-e2eb0a1af896.json new file mode 100644 index 000000000..7e7dc4d5a --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-3.2-instruct-78b/77cc280c-b794-4a9a-addc-e2eb0a1af896.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.2-instruct-78b/1762652579.759298", + "retrieved_timestamp": "1762652579.759299", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-3.2-instruct-78b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-3.2-instruct-78b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8062607215521482 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7318616272092674 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4033232628398791 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40268456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6023645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7303025265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 77.965 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.3-baguette-3b/22cbbb6d-1014-42af-96cf-1636fcb40679.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.3-baguette-3b/22cbbb6d-1014-42af-96cf-1636fcb40679.json new file mode 100644 index 000000000..7c43e9c6c --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-3.3-baguette-3b/22cbbb6d-1014-42af-96cf-1636fcb40679.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.3-baguette-3b/1762652579.759511", + "retrieved_timestamp": "1762652579.759511", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-3.3-baguette-3b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-3.3-baguette-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6359514975819713 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4678217295957521 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3806646525679758 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39282291666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3341921542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MaziyarPanahi/calme-3.3-instruct-3b/8aa85bd2-eab2-491b-95a3-ac6321cbe298.json b/data/hfopenllm_v2/MaziyarPanahi/calme-3.3-instruct-3b/8aa85bd2-eab2-491b-95a3-ac6321cbe298.json new file mode 100644 index 000000000..355fd2dd9 --- /dev/null +++ b/data/hfopenllm_v2/MaziyarPanahi/calme-3.3-instruct-3b/8aa85bd2-eab2-491b-95a3-ac6321cbe298.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.3-instruct-3b/1762652579.759784", + "retrieved_timestamp": "1762652579.759785", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-3.3-instruct-3b", + "developer": "MaziyarPanahi", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-3.3-instruct-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6423212631373645 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46933409427688694 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37386706948640486 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40742708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33053523936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Minami-su/test-7B-00/ba9ead4a-3d47-4a51-bc39-dbf72d7ff3af.json b/data/hfopenllm_v2/Minami-su/test-7B-00/ba9ead4a-3d47-4a51-bc39-dbf72d7ff3af.json new file mode 100644 index 000000000..503a40553 --- /dev/null +++ b/data/hfopenllm_v2/Minami-su/test-7B-00/ba9ead4a-3d47-4a51-bc39-dbf72d7ff3af.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Minami-su_test-7B-00/1762652579.7606468", + "retrieved_timestamp": "1762652579.76065", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Minami-su/test-7B-00", + "developer": "Minami-su", + "inference_platform": "unknown", + "id": "Minami-su/test-7B-00" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6690492338107332 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44661237656101793 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4516616314199396 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41260416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3587932180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Minami-su/test-7B-01/2918f03e-3fd5-4183-be8d-2911e0204e8d.json b/data/hfopenllm_v2/Minami-su/test-7B-01/2918f03e-3fd5-4183-be8d-2911e0204e8d.json new file mode 100644 index 000000000..02f3dec17 --- /dev/null +++ b/data/hfopenllm_v2/Minami-su/test-7B-01/2918f03e-3fd5-4183-be8d-2911e0204e8d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Minami-su_test-7B-01/1762652579.761029", + "retrieved_timestamp": "1762652579.76103", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Minami-su/test-7B-01", + "developer": "Minami-su", + "inference_platform": "unknown", + "id": "Minami-su/test-7B-01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6736204382150472 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4422359420239754 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4554380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41530208333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35355718085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Minami-su/test-v2-7B-00/95abd2ea-1fb7-4ef8-b186-bfe67148e486.json b/data/hfopenllm_v2/Minami-su/test-v2-7B-00/95abd2ea-1fb7-4ef8-b186-bfe67148e486.json new file mode 100644 index 000000000..0d80d9c60 --- /dev/null +++ b/data/hfopenllm_v2/Minami-su/test-v2-7B-00/95abd2ea-1fb7-4ef8-b186-bfe67148e486.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Minami-su_test-v2-7B-00/1762652579.76127", + "retrieved_timestamp": "1762652579.761271", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Minami-su/test-v2-7B-00", + "developer": "Minami-su", + "inference_platform": "unknown", + "id": "Minami-su/test-v2-7B-00" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6747197436136119 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4415989344595353 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4418429003021148 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41542708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3472406914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/4a68c55f-ac3d-4173-a1cc-8bb97a2b8466.json b/data/hfopenllm_v2/ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/4a68c55f-ac3d-4173-a1cc-8bb97a2b8466.json new file mode 100644 index 000000000..1880985b9 --- /dev/null +++ b/data/hfopenllm_v2/ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/4a68c55f-ac3d-4173-a1cc-8bb97a2b8466.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ModelCloud_Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/1762652579.761516", + "retrieved_timestamp": "1762652579.761517", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1", + "developer": "ModelCloud", + "inference_platform": "unknown", + "id": "ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5268919799465418 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3252726665015006 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3249166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17644614361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 5.453 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged/940d1360-047b-4c12-a7e5-cd002675c69c.json b/data/hfopenllm_v2/Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged/940d1360-047b-4c12-a7e5-cd002675c69c.json new file mode 100644 index 000000000..53b181276 --- /dev/null +++ b/data/hfopenllm_v2/Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged/940d1360-047b-4c12-a7e5-cd002675c69c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Mostafa8Mehrabi_llama-3.2-1b-Insomnia-ChatBot-merged/1762652579.7624152", + "retrieved_timestamp": "1762652579.7624161", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged", + "developer": "Mostafa8Mehrabi", + "inference_platform": "unknown", + "id": "Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13206735905176042 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3003508901818665 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0075528700906344415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23657718120805368 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33815625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11311502659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLong-8b-v4i/7c100a09-f34e-4bd7-b201-3779ee5a769d.json b/data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLong-8b-v4i/7c100a09-f34e-4bd7-b201-3779ee5a769d.json new file mode 100644 index 000000000..4471be4e3 --- /dev/null +++ b/data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLong-8b-v4i/7c100a09-f34e-4bd7-b201-3779ee5a769d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MrRobotoAI_MrRoboto-ProLong-8b-v4i/1762652579.762677", + "retrieved_timestamp": "1762652579.762678", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MrRobotoAI/MrRoboto-ProLong-8b-v4i", + "developer": "MrRobotoAI", + "inference_platform": "unknown", + "id": "MrRobotoAI/MrRoboto-ProLong-8b-v4i" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3834603297029659 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.458548650453507 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.401375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3068484042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b/4c54b609-0af6-4116-b62f-1c8a4d68f06b.json b/data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b/4c54b609-0af6-4116-b62f-1c8a4d68f06b.json new file mode 100644 index 000000000..387ae7f26 --- /dev/null +++ b/data/hfopenllm_v2/MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b/4c54b609-0af6-4116-b62f-1c8a4d68f06b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MrRobotoAI_MrRoboto-ProLongBASE-pt8-unaligned-8b/1762652579.762937", + "retrieved_timestamp": "1762652579.762937", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b", + "developer": "MrRobotoAI", + "inference_platform": "unknown", + "id": "MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34754008253655855 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4515254903058233 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04229607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42788541666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2565658244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1211-3B/2cc4a013-ff0c-44b0-b2e1-66e103606e12.json b/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1211-3B/2cc4a013-ff0c-44b0-b2e1-66e103606e12.json new file mode 100644 index 000000000..01db929de --- /dev/null +++ b/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1211-3B/2cc4a013-ff0c-44b0-b2e1-66e103606e12.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MultivexAI_Gladiator-Mini-Exp-1211-3B/1762652579.763158", + "retrieved_timestamp": "1762652579.763159", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MultivexAI/Gladiator-Mini-Exp-1211-3B", + "developer": "MultivexAI", + "inference_platform": "unknown", + "id": "MultivexAI/Gladiator-Mini-Exp-1211-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.68760887777763 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44843752663028075 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13746223564954682 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.326 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3151595744680851 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2/a152be8c-a542-4a73-8164-a43e1f04c595.json b/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2/a152be8c-a542-4a73-8164-a43e1f04c595.json new file mode 100644 index 000000000..eb54c4301 --- /dev/null +++ b/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2/a152be8c-a542-4a73-8164-a43e1f04c595.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct-V2/1762652579.763629", + "retrieved_timestamp": "1762652579.7636302", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2", + "developer": "MultivexAI", + "inference_platform": "unknown", + "id": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6215386286165153 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.438883390990549 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14123867069486404 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30082291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3025265957446808 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct/ebfb99cd-9672-4c30-9540-46e4035a0d43.json b/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct/ebfb99cd-9672-4c30-9540-46e4035a0d43.json new file mode 100644 index 000000000..992d58c1a --- /dev/null +++ b/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct/ebfb99cd-9672-4c30-9540-46e4035a0d43.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct/1762652579.763424", + "retrieved_timestamp": "1762652579.763425", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct", + "developer": "MultivexAI", + "inference_platform": "unknown", + "id": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6078748830879843 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4369766992416903 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1351963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31145833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3048537234042553 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct/990d6877-4045-49ef-ae23-f5a6302185d6.json b/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct/990d6877-4045-49ef-ae23-f5a6302185d6.json new file mode 100644 index 000000000..d19d47b6e --- /dev/null +++ b/data/hfopenllm_v2/MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct/990d6877-4045-49ef-ae23-f5a6302185d6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MultivexAI_Gladiator-Mini-Exp-1222-3B-Instruct/1762652579.763836", + "retrieved_timestamp": "1762652579.7638369", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct", + "developer": "MultivexAI", + "inference_platform": "unknown", + "id": "MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6163180361440976 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4373182371021645 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14123867069486404 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31276041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30169547872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/c14766b4-5339-4c6e-87d9-fc2bb953e176.json b/data/hfopenllm_v2/MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/c14766b4-5339-4c6e-87d9-fc2bb953e176.json new file mode 100644 index 000000000..7d4536459 --- /dev/null +++ b/data/hfopenllm_v2/MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/c14766b4-5339-4c6e-87d9-fc2bb953e176.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MultivexAI_Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/1762652579.764051", + "retrieved_timestamp": "1762652579.764052", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF", + "developer": "MultivexAI", + "inference_platform": "unknown", + "id": "MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14398241111362298 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29077474506950557 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2550335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3641979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11087101063829788 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1.1/0f9eeb32-85fb-4778-8618-436aa4f891ad.json b/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1.1/0f9eeb32-85fb-4778-8618-436aa4f891ad.json new file mode 100644 index 000000000..c685b5050 --- /dev/null +++ b/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1.1/0f9eeb32-85fb-4778-8618-436aa4f891ad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-0.3B-Instruct-v1.1/1762652579.764531", + "retrieved_timestamp": "1762652579.764531", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mxode/NanoLM-0.3B-Instruct-v1.1", + "developer": "Mxode", + "inference_platform": "unknown", + "id": "Mxode/NanoLM-0.3B-Instruct-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17827918810977095 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3014403673764691 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42733333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11211768617021277 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.315 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1/3c08189e-294e-4682-a7e0-e73a8d498fb2.json b/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1/3c08189e-294e-4682-a7e0-e73a8d498fb2.json new file mode 100644 index 000000000..5aece4530 --- /dev/null +++ b/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v1/3c08189e-294e-4682-a7e0-e73a8d498fb2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-0.3B-Instruct-v1/1762652579.764268", + "retrieved_timestamp": "1762652579.764269", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mxode/NanoLM-0.3B-Instruct-v1", + "developer": "Mxode", + "inference_platform": "unknown", + "id": "Mxode/NanoLM-0.3B-Instruct-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1536744726215331 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30282462164767127 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41552083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11053856382978723 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.315 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v2/43ce0bee-e8ee-417d-be0d-841d6e26b330.json b/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v2/43ce0bee-e8ee-417d-be0d-841d6e26b330.json new file mode 100644 index 000000000..49297ded9 --- /dev/null +++ b/data/hfopenllm_v2/Mxode/NanoLM-0.3B-Instruct-v2/43ce0bee-e8ee-417d-be0d-841d6e26b330.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-0.3B-Instruct-v2/1762652579.7647529", + "retrieved_timestamp": "1762652579.7647538", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mxode/NanoLM-0.3B-Instruct-v2", + "developer": "Mxode", + "inference_platform": "unknown", + "id": "Mxode/NanoLM-0.3B-Instruct-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1667885654507817 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29211039456850646 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3954583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11344747340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.315 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v1.1/2e482de2-60ca-4758-9de8-4482e42a5b7a.json b/data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v1.1/2e482de2-60ca-4758-9de8-4482e42a5b7a.json new file mode 100644 index 000000000..1153a7070 --- /dev/null +++ b/data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v1.1/2e482de2-60ca-4758-9de8-4482e42a5b7a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-1B-Instruct-v1.1/1762652579.764964", + "retrieved_timestamp": "1762652579.764964", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mxode/NanoLM-1B-Instruct-v1.1", + "developer": "Mxode", + "inference_platform": "unknown", + "id": "Mxode/NanoLM-1B-Instruct-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23952889444451833 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31835012059590373 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03625377643504532 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34327083333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12150930851063829 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.076 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v2/d7d1e48d-86af-4f65-803b-30fff69c78b5.json b/data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v2/d7d1e48d-86af-4f65-803b-30fff69c78b5.json new file mode 100644 index 000000000..f3f4936d8 --- /dev/null +++ b/data/hfopenllm_v2/Mxode/NanoLM-1B-Instruct-v2/d7d1e48d-86af-4f65-803b-30fff69c78b5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-1B-Instruct-v2/1762652579.765177", + "retrieved_timestamp": "1762652579.7651782", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Mxode/NanoLM-1B-Instruct-v2", + "developer": "Mxode", + "inference_platform": "unknown", + "id": "Mxode/NanoLM-1B-Instruct-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2629844368497808 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3123145400715591 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04154078549848943 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35520833333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12375332446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.076 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.3/d0ce5c14-28fa-4fde-901e-6670db6943de.json b/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.3/d0ce5c14-28fa-4fde-901e-6670db6943de.json new file mode 100644 index 000000000..004ae590c --- /dev/null +++ b/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.3/d0ce5c14-28fa-4fde-901e-6670db6943de.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama-3_1-8b-instruct-v0.3/1762652579.765912", + "retrieved_timestamp": "1762652579.765913", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.3", + "developer": "NAPS-ai", + "inference_platform": "unknown", + "id": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5390818583580456 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4900525115527062 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1903323262839879 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37870833333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33984375 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.4/467a9428-e85d-489d-be59-91842b389732.json b/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.4/467a9428-e85d-489d-be59-91842b389732.json new file mode 100644 index 000000000..d1e3b9ade --- /dev/null +++ b/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-8b-instruct-v0.4/467a9428-e85d-489d-be59-91842b389732.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama-3_1-8b-instruct-v0.4/1762652579.766172", + "retrieved_timestamp": "1762652579.766173", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.4", + "developer": "NAPS-ai", + "inference_platform": "unknown", + "id": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7344202272193336 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4861833360906734 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19637462235649547 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4421145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3474900265957447 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-instruct-v0.5.0/5553fa1d-6bf9-469d-b870-590dd4965209.json b/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-instruct-v0.5.0/5553fa1d-6bf9-469d-b870-590dd4965209.json new file mode 100644 index 000000000..8cca74cdf --- /dev/null +++ b/data/hfopenllm_v2/NAPS-ai/naps-llama-3_1-instruct-v0.5.0/5553fa1d-6bf9-469d-b870-590dd4965209.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama-3_1-instruct-v0.5.0/1762652579.766381", + "retrieved_timestamp": "1762652579.766382", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NAPS-ai/naps-llama-3_1-instruct-v0.5.0", + "developer": "NAPS-ai", + "inference_platform": "unknown", + "id": "NAPS-ai/naps-llama-3_1-instruct-v0.5.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5020124381086628 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4147584365689691 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03625377643504532 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37127083333333327 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26138630319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NCSOFT/Llama-VARCO-8B-Instruct/38876858-0585-4edb-a4af-e4c71530429c.json b/data/hfopenllm_v2/NCSOFT/Llama-VARCO-8B-Instruct/38876858-0585-4edb-a4af-e4c71530429c.json new file mode 100644 index 000000000..80f4e3594 --- /dev/null +++ b/data/hfopenllm_v2/NCSOFT/Llama-VARCO-8B-Instruct/38876858-0585-4edb-a4af-e4c71530429c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NCSOFT_Llama-VARCO-8B-Instruct/1762652579.767406", + "retrieved_timestamp": "1762652579.7674072", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NCSOFT/Llama-VARCO-8B-Instruct", + "developer": "NCSOFT", + "inference_platform": "unknown", + "id": "NCSOFT/Llama-VARCO-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4470327619604871 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5022879316026018 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3840729166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31898271276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NJS26/NJS_777/211449c7-9b14-4d20-a599-58718e9c5e4b.json b/data/hfopenllm_v2/NJS26/NJS_777/211449c7-9b14-4d20-a599-58718e9c5e4b.json new file mode 100644 index 000000000..ce7a68af8 --- /dev/null +++ b/data/hfopenllm_v2/NJS26/NJS_777/211449c7-9b14-4d20-a599-58718e9c5e4b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NJS26_NJS_777/1762652579.76769", + "retrieved_timestamp": "1762652579.76769", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NJS26/NJS_777", + "developer": "NJS26", + "inference_platform": "unknown", + "id": "NJS26/NJS_777" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18809647291409015 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21782097894078087 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2063758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35378125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11627327127659574 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 10.362 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NLPark/AnFeng_v3.1-Avocet/17b3cc41-69ac-48a2-9371-a5d1368dfeb9.json b/data/hfopenllm_v2/NLPark/AnFeng_v3.1-Avocet/17b3cc41-69ac-48a2-9371-a5d1368dfeb9.json new file mode 100644 index 000000000..b2e367916 --- /dev/null +++ b/data/hfopenllm_v2/NLPark/AnFeng_v3.1-Avocet/17b3cc41-69ac-48a2-9371-a5d1368dfeb9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NLPark_AnFeng_v3.1-Avocet/1762652579.76799", + "retrieved_timestamp": "1762652579.767991", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NLPark/AnFeng_v3.1-Avocet", + "developer": "NLPark", + "inference_platform": "unknown", + "id": "NLPark/AnFeng_v3.1-Avocet" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5096311121158525 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.582852329074409 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1593655589123867 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44757291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44381648936170215 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.393 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NLPark/B-and-W_Flycatcher-3AD1E/95b94fcb-7aba-4473-b88f-36dddcd646c1.json b/data/hfopenllm_v2/NLPark/B-and-W_Flycatcher-3AD1E/95b94fcb-7aba-4473-b88f-36dddcd646c1.json new file mode 100644 index 000000000..962e28d6f --- /dev/null +++ b/data/hfopenllm_v2/NLPark/B-and-W_Flycatcher-3AD1E/95b94fcb-7aba-4473-b88f-36dddcd646c1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NLPark_B-and-W_Flycatcher-3AD1E/1762652579.7682638", + "retrieved_timestamp": "1762652579.768265", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NLPark/B-and-W_Flycatcher-3AD1E", + "developer": "NLPark", + "inference_platform": "unknown", + "id": "NLPark/B-and-W_Flycatcher-3AD1E" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49084650948372543 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6065117528534355 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23791540785498488 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44227083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4740691489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NLPark/Shi-Ci-Robin-Test_3AD80/0fa6785d-8db5-40f9-b259-3368ffb547d4.json b/data/hfopenllm_v2/NLPark/Shi-Ci-Robin-Test_3AD80/0fa6785d-8db5-40f9-b259-3368ffb547d4.json new file mode 100644 index 000000000..0863d4fe1 --- /dev/null +++ b/data/hfopenllm_v2/NLPark/Shi-Ci-Robin-Test_3AD80/0fa6785d-8db5-40f9-b259-3368ffb547d4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NLPark_Shi-Ci-Robin-Test_3AD80/1762652579.768489", + "retrieved_timestamp": "1762652579.76849", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NLPark/Shi-Ci-Robin-Test_3AD80", + "developer": "NLPark", + "inference_platform": "unknown", + "id": "NLPark/Shi-Ci-Robin-Test_3AD80" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7226547782107031 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6704805157570325 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3157099697885196 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3598993288590604 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46959375000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5120511968085106 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NTQAI/NxMobileLM-1.5B-SFT/7a295af9-fb47-484f-8748-af3ee245d2c5.json b/data/hfopenllm_v2/NTQAI/NxMobileLM-1.5B-SFT/7a295af9-fb47-484f-8748-af3ee245d2c5.json new file mode 100644 index 000000000..421e966bd --- /dev/null +++ b/data/hfopenllm_v2/NTQAI/NxMobileLM-1.5B-SFT/7a295af9-fb47-484f-8748-af3ee245d2c5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NTQAI_NxMobileLM-1.5B-SFT/1762652579.768717", + "retrieved_timestamp": "1762652579.768718", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NTQAI/NxMobileLM-1.5B-SFT", + "developer": "NTQAI", + "inference_platform": "unknown", + "id": "NTQAI/NxMobileLM-1.5B-SFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6392239258500778 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39571778048116 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35552083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28174867021276595 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NTQAI/Nxcode-CQ-7B-orpo/1c020e50-fe68-40c9-a36a-7bec201f409a.json b/data/hfopenllm_v2/NTQAI/Nxcode-CQ-7B-orpo/1c020e50-fe68-40c9-a36a-7bec201f409a.json new file mode 100644 index 000000000..10c0b2dae --- /dev/null +++ b/data/hfopenllm_v2/NTQAI/Nxcode-CQ-7B-orpo/1c020e50-fe68-40c9-a36a-7bec201f409a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NTQAI_Nxcode-CQ-7B-orpo/1762652579.769034", + "retrieved_timestamp": "1762652579.769035", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NTQAI/Nxcode-CQ-7B-orpo", + "developer": "NTQAI", + "inference_platform": "unknown", + "id": "NTQAI/Nxcode-CQ-7B-orpo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40072119753365515 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4143023249178217 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39396875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16115359042553193 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.25 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NYTK/PULI-LlumiX-32K/7230c1f3-d7f6-4a96-8308-b2d5895a0a0a.json b/data/hfopenllm_v2/NYTK/PULI-LlumiX-32K/7230c1f3-d7f6-4a96-8308-b2d5895a0a0a.json new file mode 100644 index 000000000..b2d6ee097 --- /dev/null +++ b/data/hfopenllm_v2/NYTK/PULI-LlumiX-32K/7230c1f3-d7f6-4a96-8308-b2d5895a0a0a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NYTK_PULI-LlumiX-32K/1762652579.76952", + "retrieved_timestamp": "1762652579.769521", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NYTK/PULI-LlumiX-32K", + "developer": "NYTK", + "inference_platform": "unknown", + "id": "NYTK/PULI-LlumiX-32K" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1699612583500667 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31893582242949375 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39641666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16805186170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-Instruct/b0f68843-2f49-4d2a-91ab-ad8d07791125.json b/data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-Instruct/b0f68843-2f49-4d2a-91ab-ad8d07791125.json new file mode 100644 index 000000000..229512a37 --- /dev/null +++ b/data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-Instruct/b0f68843-2f49-4d2a-91ab-ad8d07791125.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NbAiLab_nb-llama-3.1-8B-Instruct/1762652579.7700322", + "retrieved_timestamp": "1762652579.770033", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NbAiLab/nb-llama-3.1-8B-Instruct", + "developer": "NbAiLab", + "inference_platform": "unknown", + "id": "NbAiLab/nb-llama-3.1-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.362502604201297 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466553135589526 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.022658610271903322 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32076041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1196808510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-sft/e8313b88-13ee-4926-90f8-696b0604c7b9.json b/data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-sft/e8313b88-13ee-4926-90f8-696b0604c7b9.json new file mode 100644 index 000000000..e7f8f963c --- /dev/null +++ b/data/hfopenllm_v2/NbAiLab/nb-llama-3.1-8B-sft/e8313b88-13ee-4926-90f8-696b0604c7b9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NbAiLab_nb-llama-3.1-8B-sft/1762652579.7703218", + "retrieved_timestamp": "1762652579.770323", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NbAiLab/nb-llama-3.1-8B-sft", + "developer": "NbAiLab", + "inference_platform": "unknown", + "id": "NbAiLab/nb-llama-3.1-8B-sft" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36157838978355206 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3281509048328078 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3287291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12217420212765957 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nekochu/Llama-3.1-8B-french-DPO/ebc2a3b7-30e9-4608-a8c0-ea90a308c0e5.json b/data/hfopenllm_v2/Nekochu/Llama-3.1-8B-french-DPO/ebc2a3b7-30e9-4608-a8c0-ea90a308c0e5.json new file mode 100644 index 000000000..2ba1a11a4 --- /dev/null +++ b/data/hfopenllm_v2/Nekochu/Llama-3.1-8B-french-DPO/ebc2a3b7-30e9-4608-a8c0-ea90a308c0e5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nekochu_Llama-3.1-8B-french-DPO/1762652579.770777", + "retrieved_timestamp": "1762652579.7707782", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nekochu/Llama-3.1-8B-french-DPO", + "developer": "Nekochu", + "inference_platform": "unknown", + "id": "Nekochu/Llama-3.1-8B-french-DPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46564227361179444 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5110888403999433 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09743202416918428 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4215625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3414228723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nekochu/Luminia-13B-v3/172f121a-3843-4b01-94e1-a95001909bb8.json b/data/hfopenllm_v2/Nekochu/Luminia-13B-v3/172f121a-3843-4b01-94e1-a95001909bb8.json new file mode 100644 index 000000000..c92d93d96 --- /dev/null +++ b/data/hfopenllm_v2/Nekochu/Luminia-13B-v3/172f121a-3843-4b01-94e1-a95001909bb8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nekochu_Luminia-13B-v3/1762652579.771023", + "retrieved_timestamp": "1762652579.771023", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nekochu/Luminia-13B-v3", + "developer": "Nekochu", + "inference_platform": "unknown", + "id": "Nekochu/Luminia-13B-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25231829323971505 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41121515510929624 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01812688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3983333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22149268617021275 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.016 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nekochu/Luminia-8B-RP/fd23ba4a-a0ce-474b-9aa4-b5295d872028.json b/data/hfopenllm_v2/Nekochu/Luminia-8B-RP/fd23ba4a-a0ce-474b-9aa4-b5295d872028.json new file mode 100644 index 000000000..9a50f5bfa --- /dev/null +++ b/data/hfopenllm_v2/Nekochu/Luminia-8B-RP/fd23ba4a-a0ce-474b-9aa4-b5295d872028.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nekochu_Luminia-8B-RP/1762652579.7713962", + "retrieved_timestamp": "1762652579.7713978", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nekochu/Luminia-8B-RP", + "developer": "Nekochu", + "inference_platform": "unknown", + "id": "Nekochu/Luminia-8B-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5574165436597118 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5218151030627874 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13595166163141995 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3997604166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3631150265957447 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-12B/cee1293c-54fb-4275-b5a9-0215e5f9a4c0.json b/data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-12B/cee1293c-54fb-4275-b5a9-0215e5f9a4c0.json new file mode 100644 index 000000000..b9bdbd621 --- /dev/null +++ b/data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-12B/cee1293c-54fb-4275-b5a9-0215e5f9a4c0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NeverSleep_Lumimaid-v0.2-12B/1762652579.771668", + "retrieved_timestamp": "1762652579.771669", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NeverSleep/Lumimaid-v0.2-12B", + "developer": "NeverSleep", + "inference_platform": "unknown", + "id": "NeverSleep/Lumimaid-v0.2-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10993497253952846 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5395610525850818 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48211458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3511469414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-8B/6d7f1ac9-66c8-4700-87a9-0e413fc8878e.json b/data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-8B/6d7f1ac9-66c8-4700-87a9-0e413fc8878e.json new file mode 100644 index 000000000..95f35860e --- /dev/null +++ b/data/hfopenllm_v2/NeverSleep/Lumimaid-v0.2-8B/6d7f1ac9-66c8-4700-87a9-0e413fc8878e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NeverSleep_Lumimaid-v0.2-8B/1762652579.771939", + "retrieved_timestamp": "1762652579.771939", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NeverSleep/Lumimaid-v0.2-8B", + "developer": "NeverSleep", + "inference_platform": "unknown", + "id": "NeverSleep/Lumimaid-v0.2-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5038109992597419 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5237767601226618 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4303020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36361369680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_Halo_0.1/243b045a-8442-41fd-a483-e4e25b771048.json b/data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_Halo_0.1/243b045a-8442-41fd-a483-e4e25b771048.json new file mode 100644 index 000000000..9115ad10e --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_Halo_0.1/243b045a-8442-41fd-a483-e4e25b771048.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Nemotron_W_4b_Halo_0.1/1762652579.78175", + "retrieved_timestamp": "1762652579.7817512", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Nemotron_W_4b_Halo_0.1", + "developer": "Nexesenex", + "inference_platform": "unknown", + "id": "Nexesenex/Nemotron_W_4b_Halo_0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3627275628665275 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4135101667655742 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04229607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28020134228187926 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41651041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25049867021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.513 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_MagLight_0.1/2f3f0dcb-a62d-44bd-b86d-c1f403d5b833.json b/data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_MagLight_0.1/2f3f0dcb-a62d-44bd-b86d-c1f403d5b833.json new file mode 100644 index 000000000..1308d9129 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/Nemotron_W_4b_MagLight_0.1/2f3f0dcb-a62d-44bd-b86d-c1f403d5b833.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Nemotron_W_4b_MagLight_0.1/1762652579.781992", + "retrieved_timestamp": "1762652579.781993", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Nemotron_W_4b_MagLight_0.1", + "developer": "Nexesenex", + "inference_platform": "unknown", + "id": "Nexesenex/Nemotron_W_4b_MagLight_0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4230275668559422 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42314083807225433 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41120833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2544880319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.513 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/318afc06-f294-4253-b1c9-173a7f56083b.json b/data/hfopenllm_v2/Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/318afc06-f294-4253-b1c9-173a7f56083b.json new file mode 100644 index 000000000..35d7bd691 --- /dev/null +++ b/data/hfopenllm_v2/Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/318afc06-f294-4253-b1c9-173a7f56083b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/1762652579.7826922", + "retrieved_timestamp": "1762652579.7826939", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL", + "developer": "Nexesenex", + "inference_platform": "unknown", + "id": "Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5889905450870357 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3562492190965966 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07477341389728095 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33955208333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1802692819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nexusflow/NexusRaven-V2-13B/f5e5662e-803e-4f1f-82e7-14a2a189ed6d.json b/data/hfopenllm_v2/Nexusflow/NexusRaven-V2-13B/f5e5662e-803e-4f1f-82e7-14a2a189ed6d.json new file mode 100644 index 000000000..156c68fee --- /dev/null +++ b/data/hfopenllm_v2/Nexusflow/NexusRaven-V2-13B/f5e5662e-803e-4f1f-82e7-14a2a189ed6d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexusflow_NexusRaven-V2-13B/1762652579.782948", + "retrieved_timestamp": "1762652579.7829492", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexusflow/NexusRaven-V2-13B", + "developer": "Nexusflow", + "inference_platform": "unknown", + "id": "Nexusflow/NexusRaven-V2-13B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1790781792311068 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39488604640507335 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3736875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18716755319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-1epoch/0cf3db2f-9b23-4602-ac92-265bafd36410.json b/data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-1epoch/0cf3db2f-9b23-4602-ac92-265bafd36410.json new file mode 100644 index 000000000..b6f521bc6 --- /dev/null +++ b/data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-1epoch/0cf3db2f-9b23-4602-ac92-265bafd36410.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NikolaSigmoid_AceMath-1.5B-Instruct-1epoch/1762652579.783191", + "retrieved_timestamp": "1762652579.7831922", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NikolaSigmoid/AceMath-1.5B-Instruct-1epoch", + "developer": "NikolaSigmoid", + "inference_platform": "unknown", + "id": "NikolaSigmoid/AceMath-1.5B-Instruct-1epoch" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2848918646967823 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.426284784119477 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30513595166163143 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39251041666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23761635638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.791 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200/93f56942-30d8-4a0f-af8d-901fb264436c.json b/data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200/93f56942-30d8-4a0f-af8d-901fb264436c.json new file mode 100644 index 000000000..b3b9181ee --- /dev/null +++ b/data/hfopenllm_v2/NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200/93f56942-30d8-4a0f-af8d-901fb264436c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NikolaSigmoid_AceMath-1.5B-Instruct-dolphin-r1-200/1762652579.783446", + "retrieved_timestamp": "1762652579.783447", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200", + "developer": "NikolaSigmoid", + "inference_platform": "unknown", + "id": "NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18080249294095221 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28148007801214714 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37495833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11427859042553191 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.928 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NikolaSigmoid/acemath-200/4414a96e-0664-4531-9c0f-3eb4a062fbe2.json b/data/hfopenllm_v2/NikolaSigmoid/acemath-200/4414a96e-0664-4531-9c0f-3eb4a062fbe2.json new file mode 100644 index 000000000..06b5c1c79 --- /dev/null +++ b/data/hfopenllm_v2/NikolaSigmoid/acemath-200/4414a96e-0664-4531-9c0f-3eb4a062fbe2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NikolaSigmoid_acemath-200/1762652579.783974", + "retrieved_timestamp": "1762652579.783974", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NikolaSigmoid/acemath-200", + "developer": "NikolaSigmoid", + "inference_platform": "unknown", + "id": "NikolaSigmoid/acemath-200" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2848918646967823 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.426284784119477 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30513595166163143 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39251041666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23761635638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.791 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420/e841483e-042b-4a2a-8dbc-9ed7529f7618.json b/data/hfopenllm_v2/Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420/e841483e-042b-4a2a-8dbc-9ed7529f7618.json new file mode 100644 index 000000000..551aeb0c7 --- /dev/null +++ b/data/hfopenllm_v2/Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420/e841483e-042b-4a2a-8dbc-9ed7529f7618.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain-Eris-BMO_Violent-GRPO-v0.420/1762652579.784868", + "retrieved_timestamp": "1762652579.7848692", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420", + "developer": "Nitral-AI", + "inference_platform": "unknown", + "id": "Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6312805578088361 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5078530730075063 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13141993957703926 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4228020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.359624335106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nitral-AI/Captain-Eris_BMO-Violent-12B/ebcd5d63-5c91-41d1-b9e2-0bafe7170000.json b/data/hfopenllm_v2/Nitral-AI/Captain-Eris_BMO-Violent-12B/ebcd5d63-5c91-41d1-b9e2-0bafe7170000.json new file mode 100644 index 000000000..8f66107eb --- /dev/null +++ b/data/hfopenllm_v2/Nitral-AI/Captain-Eris_BMO-Violent-12B/ebcd5d63-5c91-41d1-b9e2-0bafe7170000.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain-Eris_BMO-Violent-12B/1762652579.785123", + "retrieved_timestamp": "1762652579.785124", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nitral-AI/Captain-Eris_BMO-Violent-12B", + "developer": "Nitral-AI", + "inference_platform": "unknown", + "id": "Nitral-AI/Captain-Eris_BMO-Violent-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.615218730745533 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5104372825851065 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13670694864048338 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42553124999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35713098404255317 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-GRPO-v0.420/cf030461-1234-48ce-a025-ba0f52cdf191.json b/data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-GRPO-v0.420/cf030461-1234-48ce-a025-ba0f52cdf191.json new file mode 100644 index 000000000..08f53d583 --- /dev/null +++ b/data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-GRPO-v0.420/cf030461-1234-48ce-a025-ba0f52cdf191.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain-Eris_Violet-GRPO-v0.420/1762652579.785343", + "retrieved_timestamp": "1762652579.785344", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nitral-AI/Captain-Eris_Violet-GRPO-v0.420", + "developer": "Nitral-AI", + "inference_platform": "unknown", + "id": "Nitral-AI/Captain-Eris_Violet-GRPO-v0.420" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6261597007052399 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.515921407165298 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10800604229607251 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42791666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35347406914893614 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-V0.420-12B/ad87ba77-99a9-463f-aea3-1d29fc0317b0.json b/data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-V0.420-12B/ad87ba77-99a9-463f-aea3-1d29fc0317b0.json new file mode 100644 index 000000000..f6777e5b9 --- /dev/null +++ b/data/hfopenllm_v2/Nitral-AI/Captain-Eris_Violet-V0.420-12B/ad87ba77-99a9-463f-aea3-1d29fc0317b0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain-Eris_Violet-V0.420-12B/1762652579.785556", + "retrieved_timestamp": "1762652579.785557", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nitral-AI/Captain-Eris_Violet-V0.420-12B", + "developer": "Nitral-AI", + "inference_platform": "unknown", + "id": "Nitral-AI/Captain-Eris_Violet-V0.420-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43391866913123844 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5478099417611365 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10725075528700906 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43306249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3722573138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nitral-AI/Captain_BMO-12B/6fed7e5b-9692-40f7-913e-fc3b57b8c72a.json b/data/hfopenllm_v2/Nitral-AI/Captain_BMO-12B/6fed7e5b-9692-40f7-913e-fc3b57b8c72a.json new file mode 100644 index 000000000..33861e469 --- /dev/null +++ b/data/hfopenllm_v2/Nitral-AI/Captain_BMO-12B/6fed7e5b-9692-40f7-913e-fc3b57b8c72a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain_BMO-12B/1762652579.7857668", + "retrieved_timestamp": "1762652579.7857668", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nitral-AI/Captain_BMO-12B", + "developer": "Nitral-AI", + "inference_platform": "unknown", + "id": "Nitral-AI/Captain_BMO-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4750595087700634 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5285960650424973 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13972809667673716 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37480208333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3568816489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nitral-AI/Hathor_Stable-v0.2-L3-8B/2bb06e2f-9aee-4ac4-b9a6-fe537c2c9890.json b/data/hfopenllm_v2/Nitral-AI/Hathor_Stable-v0.2-L3-8B/2bb06e2f-9aee-4ac4-b9a6-fe537c2c9890.json new file mode 100644 index 000000000..e3b1d6413 --- /dev/null +++ b/data/hfopenllm_v2/Nitral-AI/Hathor_Stable-v0.2-L3-8B/2bb06e2f-9aee-4ac4-b9a6-fe537c2c9890.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nitral-AI_Hathor_Stable-v0.2-L3-8B/1762652579.7859662", + "retrieved_timestamp": "1762652579.785967", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nitral-AI/Hathor_Stable-v0.2-L3-8B", + "developer": "Nitral-AI", + "inference_platform": "unknown", + "id": "Nitral-AI/Hathor_Stable-v0.2-L3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7174840534226963 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5285819178301682 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10498489425981873 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3780625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36959773936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nitral-AI/Hathor_Tahsin-L3-8B-v0.85/a73461e6-a1f4-43c9-9a0f-f03c9be46276.json b/data/hfopenllm_v2/Nitral-AI/Hathor_Tahsin-L3-8B-v0.85/a73461e6-a1f4-43c9-9a0f-f03c9be46276.json new file mode 100644 index 000000000..74b9a7bde --- /dev/null +++ b/data/hfopenllm_v2/Nitral-AI/Hathor_Tahsin-L3-8B-v0.85/a73461e6-a1f4-43c9-9a0f-f03c9be46276.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nitral-AI_Hathor_Tahsin-L3-8B-v0.85/1762652579.786179", + "retrieved_timestamp": "1762652579.78618", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nitral-AI/Hathor_Tahsin-L3-8B-v0.85", + "developer": "Nitral-AI", + "inference_platform": "unknown", + "id": "Nitral-AI/Hathor_Tahsin-L3-8B-v0.85" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7110145524984818 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5279036861109899 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10045317220543806 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3646666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37200797872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nitral-AI/Nera_Noctis-12B/2f5caa38-56e9-4740-baca-22fb02e57150.json b/data/hfopenllm_v2/Nitral-AI/Nera_Noctis-12B/2f5caa38-56e9-4740-baca-22fb02e57150.json new file mode 100644 index 000000000..a5243d0dc --- /dev/null +++ b/data/hfopenllm_v2/Nitral-AI/Nera_Noctis-12B/2f5caa38-56e9-4740-baca-22fb02e57150.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nitral-AI_Nera_Noctis-12B/1762652579.786392", + "retrieved_timestamp": "1762652579.7863932", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nitral-AI/Nera_Noctis-12B", + "developer": "Nitral-AI", + "inference_platform": "unknown", + "id": "Nitral-AI/Nera_Noctis-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45617517076911485 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5193675192746302 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08761329305135952 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39790624999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3468251329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.1/9836e2c7-30df-421d-bf02-d4434f97d990.json b/data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.1/9836e2c7-30df-421d-bf02-d4434f97d990.json new file mode 100644 index 000000000..067c99d8b --- /dev/null +++ b/data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.1/9836e2c7-30df-421d-bf02-d4434f97d990.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nohobby_MS-Schisandra-22B-v0.1/1762652579.786606", + "retrieved_timestamp": "1762652579.786607", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nohobby/MS-Schisandra-22B-v0.1", + "developer": "Nohobby", + "inference_platform": "unknown", + "id": "Nohobby/MS-Schisandra-22B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6331289866443259 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5789949714896523 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22280966767371602 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33221476510067116 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39284375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4095744680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.2/9a263094-fb31-43b9-9307-6ae5f64f82c0.json b/data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.2/9a263094-fb31-43b9-9307-6ae5f64f82c0.json new file mode 100644 index 000000000..ff6333620 --- /dev/null +++ b/data/hfopenllm_v2/Nohobby/MS-Schisandra-22B-v0.2/9a263094-fb31-43b9-9307-6ae5f64f82c0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nohobby_MS-Schisandra-22B-v0.2/1762652579.78686", + "retrieved_timestamp": "1762652579.786861", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nohobby/MS-Schisandra-22B-v0.2", + "developer": "Nohobby", + "inference_platform": "unknown", + "id": "Nohobby/MS-Schisandra-22B-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6382997114323329 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5841215984231857 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20317220543806647 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33557046979865773 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40747916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4136469414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Norquinal/Alpha/6ce53368-e6b5-45a1-a997-ca5468f27c13.json b/data/hfopenllm_v2/Norquinal/Alpha/6ce53368-e6b5-45a1-a997-ca5468f27c13.json new file mode 100644 index 000000000..42cae6450 --- /dev/null +++ b/data/hfopenllm_v2/Norquinal/Alpha/6ce53368-e6b5-45a1-a997-ca5468f27c13.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Norquinal_Alpha/1762652579.787071", + "retrieved_timestamp": "1762652579.787072", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Norquinal/Alpha", + "developer": "Norquinal", + "inference_platform": "unknown", + "id": "Norquinal/Alpha" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802951723648808 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3373652507108038 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36308333333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30028257978723405 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Norquinal/Bravo/dbdae48e-5023-453f-b15f-cf779068e030.json b/data/hfopenllm_v2/Norquinal/Bravo/dbdae48e-5023-453f-b15f-cf779068e030.json new file mode 100644 index 000000000..a1f7c4934 --- /dev/null +++ b/data/hfopenllm_v2/Norquinal/Bravo/dbdae48e-5023-453f-b15f-cf779068e030.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Norquinal_Bravo/1762652579.787321", + "retrieved_timestamp": "1762652579.787322", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Norquinal/Bravo", + "developer": "Norquinal", + "inference_platform": "unknown", + "id": "Norquinal/Bravo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3024519386339357 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3558431980261287 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38686458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.312749335106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Norquinal/Charlie/31f784e4-bded-48d8-b7a6-7936b5d21d9e.json b/data/hfopenllm_v2/Norquinal/Charlie/31f784e4-bded-48d8-b7a6-7936b5d21d9e.json new file mode 100644 index 000000000..92c8e05aa --- /dev/null +++ b/data/hfopenllm_v2/Norquinal/Charlie/31f784e4-bded-48d8-b7a6-7936b5d21d9e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Norquinal_Charlie/1762652579.787528", + "retrieved_timestamp": "1762652579.787528", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Norquinal/Charlie", + "developer": "Norquinal", + "inference_platform": "unknown", + "id": "Norquinal/Charlie" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3060989286205047 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3515288346438244 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0581570996978852 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3736875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30925864361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Norquinal/Delta/684a3a6e-c74d-456f-b80e-c099b8c9973c.json b/data/hfopenllm_v2/Norquinal/Delta/684a3a6e-c74d-456f-b80e-c099b8c9973c.json new file mode 100644 index 000000000..b9f2540c5 --- /dev/null +++ b/data/hfopenllm_v2/Norquinal/Delta/684a3a6e-c74d-456f-b80e-c099b8c9973c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Norquinal_Delta/1762652579.78773", + "retrieved_timestamp": "1762652579.787731", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Norquinal/Delta", + "developer": "Norquinal", + "inference_platform": "unknown", + "id": "Norquinal/Delta" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.253842028041153 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3434783285415976 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3776875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2958776595744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Norquinal/Echo/f2f250f7-8cb0-4076-b2f0-7cf8ee911532.json b/data/hfopenllm_v2/Norquinal/Echo/f2f250f7-8cb0-4076-b2f0-7cf8ee911532.json new file mode 100644 index 000000000..ce746cafe --- /dev/null +++ b/data/hfopenllm_v2/Norquinal/Echo/f2f250f7-8cb0-4076-b2f0-7cf8ee911532.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Norquinal_Echo/1762652579.787929", + "retrieved_timestamp": "1762652579.787929", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Norquinal/Echo", + "developer": "Norquinal", + "inference_platform": "unknown", + "id": "Norquinal/Echo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31579099012841483 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35304654390055795 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3804479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30950797872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Norquinal/Foxtrot/2a4428d4-a6c9-427c-ba67-72f08b590b8e.json b/data/hfopenllm_v2/Norquinal/Foxtrot/2a4428d4-a6c9-427c-ba67-72f08b590b8e.json new file mode 100644 index 000000000..19ac8fdf5 --- /dev/null +++ b/data/hfopenllm_v2/Norquinal/Foxtrot/2a4428d4-a6c9-427c-ba67-72f08b590b8e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Norquinal_Foxtrot/1762652579.788121", + "retrieved_timestamp": "1762652579.788121", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Norquinal/Foxtrot", + "developer": "Norquinal", + "inference_platform": "unknown", + "id": "Norquinal/Foxtrot" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011531624977283 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3558026577191667 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0581570996978852 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3804166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30501994680851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Norquinal/Golf/dfdcfbfa-c023-40bf-b5e3-632b45f28aab.json b/data/hfopenllm_v2/Norquinal/Golf/dfdcfbfa-c023-40bf-b5e3-632b45f28aab.json new file mode 100644 index 000000000..3bd2bad02 --- /dev/null +++ b/data/hfopenllm_v2/Norquinal/Golf/dfdcfbfa-c023-40bf-b5e3-632b45f28aab.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Norquinal_Golf/1762652579.788314", + "retrieved_timestamp": "1762652579.7883148", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Norquinal/Golf", + "developer": "Norquinal", + "inference_platform": "unknown", + "id": "Norquinal/Golf" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3533601953926692 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35332648991705207 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.338 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30560172872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Norquinal/Hotel/f91abb9a-6690-4fec-b1a7-f519dfe66d24.json b/data/hfopenllm_v2/Norquinal/Hotel/f91abb9a-6690-4fec-b1a7-f519dfe66d24.json new file mode 100644 index 000000000..e913776d4 --- /dev/null +++ b/data/hfopenllm_v2/Norquinal/Hotel/f91abb9a-6690-4fec-b1a7-f519dfe66d24.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Norquinal_Hotel/1762652579.788509", + "retrieved_timestamp": "1762652579.7885098", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Norquinal/Hotel", + "developer": "Norquinal", + "inference_platform": "unknown", + "id": "Norquinal/Hotel" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3215113676157041 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36785702492059275 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.052870090634441085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3156582446808511 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mistral-7B-DPO/877421ae-8135-485f-805e-489ed70dc886.json b/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mistral-7B-DPO/877421ae-8135-485f-805e-489ed70dc886.json new file mode 100644 index 000000000..f5cd822c3 --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mistral-7B-DPO/877421ae-8135-485f-805e-489ed70dc886.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-2-Mistral-7B-DPO/1762652579.7912042", + "retrieved_timestamp": "1762652579.7912052", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO", + "developer": "NousResearch", + "inference_platform": "unknown", + "id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5762510139762497 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48526536654652347 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099697 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3999791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3015292553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO/bc2d14fe-000a-40ce-a57c-c00fe584a7e4.json b/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO/bc2d14fe-000a-40ce-a57c-c00fe584a7e4.json new file mode 100644 index 000000000..47c9b7786 --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO/bc2d14fe-000a-40ce-a57c-c00fe584a7e4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-2-Mixtral-8x7B-DPO/1762652579.791439", + "retrieved_timestamp": "1762652579.7914398", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", + "developer": "NousResearch", + "inference_platform": "unknown", + "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5896898008395501 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5538851384033822 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12235649546827794 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4595416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3666057180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT/3c196d70-44ad-419c-8c4c-80fc7f184687.json b/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT/3c196d70-44ad-419c-8c4c-80fc7f184687.json new file mode 100644 index 000000000..db5eed83d --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT/3c196d70-44ad-419c-8c4c-80fc7f184687.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-2-Mixtral-8x7B-SFT/1762652579.791643", + "retrieved_timestamp": "1762652579.7916439", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT", + "developer": "NousResearch", + "inference_platform": "unknown", + "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5730783210769648 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5057868454026635 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.021148036253776436 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.421375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30659906914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-SOLAR-10.7B/80a7b60b-77f7-4dbf-96c8-071c56179fec.json b/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-SOLAR-10.7B/80a7b60b-77f7-4dbf-96c8-071c56179fec.json new file mode 100644 index 000000000..63613e92e --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Nous-Hermes-2-SOLAR-10.7B/80a7b60b-77f7-4dbf-96c8-071c56179fec.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-2-SOLAR-10.7B/1762652579.791853", + "retrieved_timestamp": "1762652579.7918541", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NousResearch/Nous-Hermes-2-SOLAR-10.7B", + "developer": "NousResearch", + "inference_platform": "unknown", + "id": "NousResearch/Nous-Hermes-2-SOLAR-10.7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5278660620486975 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5414294841140173 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43728125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3458277925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-32k/a18a259d-1795-4848-94fd-3b9c3abfb9da.json b/data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-32k/a18a259d-1795-4848-94fd-3b9c3abfb9da.json new file mode 100644 index 000000000..fdf37d024 --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-32k/a18a259d-1795-4848-94fd-3b9c3abfb9da.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Solar-10b-32k/1762652579.793437", + "retrieved_timestamp": "1762652579.793438", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NousResearch/Yarn-Solar-10b-32k", + "developer": "NousResearch", + "inference_platform": "unknown", + "id": "NousResearch/Yarn-Solar-10b-32k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19421579187666504 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4986859152325069 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030211480362537766 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4146458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32721077127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-64k/1904c811-34ae-4f52-9978-622bc6dd6f2e.json b/data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-64k/1904c811-34ae-4f52-9978-622bc6dd6f2e.json new file mode 100644 index 000000000..c57b84d30 --- /dev/null +++ b/data/hfopenllm_v2/NousResearch/Yarn-Solar-10b-64k/1904c811-34ae-4f52-9978-622bc6dd6f2e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Solar-10b-64k/1762652579.793644", + "retrieved_timestamp": "1762652579.7936451", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NousResearch/Yarn-Solar-10b-64k", + "developer": "NousResearch", + "inference_platform": "unknown", + "id": "NousResearch/Yarn-Solar-10b-64k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1988867316498003 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49219907954226505 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028700906344410877 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40143750000000006 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3148271276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/ASTAROTH-3.2-1B/e454276c-3113-49f8-9397-9c1ad5e7bcc5.json b/data/hfopenllm_v2/Novaciano/ASTAROTH-3.2-1B/e454276c-3113-49f8-9397-9c1ad5e7bcc5.json new file mode 100644 index 000000000..c140c09c9 --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/ASTAROTH-3.2-1B/e454276c-3113-49f8-9397-9c1ad5e7bcc5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Novaciano_ASTAROTH-3.2-1B/1762652579.7938519", + "retrieved_timestamp": "1762652579.793853", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Novaciano/ASTAROTH-3.2-1B", + "developer": "Novaciano", + "inference_platform": "unknown", + "id": "Novaciano/ASTAROTH-3.2-1B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5612884923115112 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3542962056805596 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07326283987915408 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31421875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19090757978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/BLAST_PROCESSING-3.2-1B/61173be4-9a87-4dfa-812d-b414b4d2bccb.json b/data/hfopenllm_v2/Novaciano/BLAST_PROCESSING-3.2-1B/61173be4-9a87-4dfa-812d-b414b4d2bccb.json new file mode 100644 index 000000000..589a169eb --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/BLAST_PROCESSING-3.2-1B/61173be4-9a87-4dfa-812d-b414b4d2bccb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Novaciano_BLAST_PROCESSING-3.2-1B/1762652579.794129", + "retrieved_timestamp": "1762652579.7941298", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Novaciano/BLAST_PROCESSING-3.2-1B", + "developer": "Novaciano", + "inference_platform": "unknown", + "id": "Novaciano/BLAST_PROCESSING-3.2-1B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3921783091087204 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3460318843168258 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07477341389728097 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3351458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19414893617021275 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/Cerberus-3.2-1B/2d6ff76b-9d81-45a7-8768-6a240b5395ab.json b/data/hfopenllm_v2/Novaciano/Cerberus-3.2-1B/2d6ff76b-9d81-45a7-8768-6a240b5395ab.json new file mode 100644 index 000000000..6872e2b81 --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/Cerberus-3.2-1B/2d6ff76b-9d81-45a7-8768-6a240b5395ab.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Novaciano_Cerberus-3.2-1B/1762652579.7945569", + "retrieved_timestamp": "1762652579.794559", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Novaciano/Cerberus-3.2-1B", + "developer": "Novaciano", + "inference_platform": "unknown", + "id": "Novaciano/Cerberus-3.2-1B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5016877440746109 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4164937678626939 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0581570996978852 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32888541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1663065159574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/Cultist-3.2-1B/3dc51dce-222f-455b-b61a-04904c7fc855.json b/data/hfopenllm_v2/Novaciano/Cultist-3.2-1B/3dc51dce-222f-455b-b61a-04904c7fc855.json new file mode 100644 index 000000000..d05e7d759 --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/Cultist-3.2-1B/3dc51dce-222f-455b-b61a-04904c7fc855.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Novaciano_Cultist-3.2-1B/1762652579.7949288", + "retrieved_timestamp": "1762652579.79493", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Novaciano/Cultist-3.2-1B", + "developer": "Novaciano", + "inference_platform": "unknown", + "id": "Novaciano/Cultist-3.2-1B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5294895322189568 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3399311286410264 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3330104166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17137632978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP/16a8882c-12f5-46d0-8e1f-88b22aa8f08c.json b/data/hfopenllm_v2/Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP/16a8882c-12f5-46d0-8e1f-88b22aa8f08c.json new file mode 100644 index 000000000..db9cccd9e --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP/16a8882c-12f5-46d0-8e1f-88b22aa8f08c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Novaciano_FuseChat-3.2-1B-GRPO_Creative_RP/1762652579.795153", + "retrieved_timestamp": "1762652579.795153", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP", + "developer": "Novaciano", + "inference_platform": "unknown", + "id": "Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.559814625194484 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3487816706572648 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08006042296072508 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33288541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17345412234042554 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative/7fe4c32b-0bbd-49c0-9e4f-43306457aae8.json b/data/hfopenllm_v2/Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative/7fe4c32b-0bbd-49c0-9e4f-43306457aae8.json new file mode 100644 index 000000000..5295e689b --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative/7fe4c32b-0bbd-49c0-9e4f-43306457aae8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Novaciano_Fusetrix-3.2-1B-GRPO_RP_Creative/1762652579.795362", + "retrieved_timestamp": "1762652579.795362", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative", + "developer": "Novaciano", + "inference_platform": "unknown", + "id": "Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5366339091388627 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3434595088038714 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1148036253776435 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3209166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17578125 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/HarmfulProject-3.2-1B/99b31db9-55f8-41c2-9eb9-f21511deccf0.json b/data/hfopenllm_v2/Novaciano/HarmfulProject-3.2-1B/99b31db9-55f8-41c2-9eb9-f21511deccf0.json new file mode 100644 index 000000000..4fd9b1235 --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/HarmfulProject-3.2-1B/99b31db9-55f8-41c2-9eb9-f21511deccf0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Novaciano_HarmfulProject-3.2-1B/1762652579.7958348", + "retrieved_timestamp": "1762652579.795836", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Novaciano/HarmfulProject-3.2-1B", + "developer": "Novaciano", + "inference_platform": "unknown", + "id": "Novaciano/HarmfulProject-3.2-1B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3873821460391761 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32744993658117816 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.341875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18226396276595744 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/LEWD-Mental-Cultist-3.2-1B/1bce579e-9fac-46a9-92ef-48080832abbb.json b/data/hfopenllm_v2/Novaciano/LEWD-Mental-Cultist-3.2-1B/1bce579e-9fac-46a9-92ef-48080832abbb.json new file mode 100644 index 000000000..c61fb8125 --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/LEWD-Mental-Cultist-3.2-1B/1bce579e-9fac-46a9-92ef-48080832abbb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Novaciano_LEWD-Mental-Cultist-3.2-1B/1762652579.796045", + "retrieved_timestamp": "1762652579.796046", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Novaciano/LEWD-Mental-Cultist-3.2-1B", + "developer": "Novaciano", + "inference_platform": "unknown", + "id": "Novaciano/LEWD-Mental-Cultist-3.2-1B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5308636639671627 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35127188813594756 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.052870090634441085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32228125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1768617021276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/La_Mejor_Mezcla-3.2-1B/49fef1c9-bf18-465c-acdb-b8f17e93dbad.json b/data/hfopenllm_v2/Novaciano/La_Mejor_Mezcla-3.2-1B/49fef1c9-bf18-465c-acdb-b8f17e93dbad.json new file mode 100644 index 000000000..18eb35e54 --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/La_Mejor_Mezcla-3.2-1B/49fef1c9-bf18-465c-acdb-b8f17e93dbad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Novaciano_La_Mejor_Mezcla-3.2-1B/1762652579.79625", + "retrieved_timestamp": "1762652579.7962508", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Novaciano/La_Mejor_Mezcla-3.2-1B", + "developer": "Novaciano", + "inference_platform": "unknown", + "id": "Novaciano/La_Mejor_Mezcla-3.2-1B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5509969104199081 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34879364478381225 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08987915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18292885638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Novaciano/Sigil-Of-Satan-3.2-1B/ae9ceba0-8e8a-431f-a762-7bb6c55b4757.json b/data/hfopenllm_v2/Novaciano/Sigil-Of-Satan-3.2-1B/ae9ceba0-8e8a-431f-a762-7bb6c55b4757.json new file mode 100644 index 000000000..632aaafe6 --- /dev/null +++ b/data/hfopenllm_v2/Novaciano/Sigil-Of-Satan-3.2-1B/ae9ceba0-8e8a-431f-a762-7bb6c55b4757.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Novaciano_Sigil-Of-Satan-3.2-1B/1762652579.7964501", + "retrieved_timestamp": "1762652579.7964501", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Novaciano/Sigil-Of-Satan-3.2-1B", + "developer": "Novaciano", + "inference_platform": "unknown", + "id": "Novaciano/Sigil-Of-Satan-3.2-1B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5494233079340594 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3545862332731657 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3276145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18550531914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/NucleusAI/nucleus-22B-token-500B/f18c51de-f5eb-4986-8c44-35bd71db5e8b.json b/data/hfopenllm_v2/NucleusAI/nucleus-22B-token-500B/f18c51de-f5eb-4986-8c44-35bd71db5e8b.json new file mode 100644 index 000000000..6c331a0c7 --- /dev/null +++ b/data/hfopenllm_v2/NucleusAI/nucleus-22B-token-500B/f18c51de-f5eb-4986-8c44-35bd71db5e8b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NucleusAI_nucleus-22B-token-500B/1762652579.7966561", + "retrieved_timestamp": "1762652579.7966561", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NucleusAI/nucleus-22B-token-500B", + "developer": "NucleusAI", + "inference_platform": "unknown", + "id": "NucleusAI/nucleus-22B-token-500B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.025654153202391873 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29198007801214715 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3510520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11619015957446809 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 21.828 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OEvortex/HelpingAI-15B/4ffdc303-b5e4-45f0-839c-432f04dc5d57.json b/data/hfopenllm_v2/OEvortex/HelpingAI-15B/4ffdc303-b5e4-45f0-839c-432f04dc5d57.json new file mode 100644 index 000000000..e81c64c73 --- /dev/null +++ b/data/hfopenllm_v2/OEvortex/HelpingAI-15B/4ffdc303-b5e4-45f0-839c-432f04dc5d57.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OEvortex_HelpingAI-15B/1762652579.797408", + "retrieved_timestamp": "1762652579.797409", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OEvortex/HelpingAI-15B", + "developer": "OEvortex", + "inference_platform": "unknown", + "id": "OEvortex/HelpingAI-15B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2030091268944179 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936006977853758 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.361875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11112034574468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 15.323 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OEvortex/HelpingAI-3B-reloaded/628026b2-efc1-4592-a85b-f5d2ea1dc1dd.json b/data/hfopenllm_v2/OEvortex/HelpingAI-3B-reloaded/628026b2-efc1-4592-a85b-f5d2ea1dc1dd.json new file mode 100644 index 000000000..4e5348434 --- /dev/null +++ b/data/hfopenllm_v2/OEvortex/HelpingAI-3B-reloaded/628026b2-efc1-4592-a85b-f5d2ea1dc1dd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OEvortex_HelpingAI-3B-reloaded/1762652579.797647", + "retrieved_timestamp": "1762652579.797647", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OEvortex/HelpingAI-3B-reloaded", + "developer": "OEvortex", + "inference_platform": "unknown", + "id": "OEvortex/HelpingAI-3B-reloaded" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46466819150963884 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4128512897904065 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3524479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25947473404255317 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.81 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OEvortex/HelpingAI2-9B/d04d6474-5784-4492-8347-a2bc03eca6ba.json b/data/hfopenllm_v2/OEvortex/HelpingAI2-9B/d04d6474-5784-4492-8347-a2bc03eca6ba.json new file mode 100644 index 000000000..74afc67f4 --- /dev/null +++ b/data/hfopenllm_v2/OEvortex/HelpingAI2-9B/d04d6474-5784-4492-8347-a2bc03eca6ba.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OEvortex_HelpingAI2-9B/1762652579.797843", + "retrieved_timestamp": "1762652579.797844", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OEvortex/HelpingAI2-9B", + "developer": "OEvortex", + "inference_platform": "unknown", + "id": "OEvortex/HelpingAI2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44131238447319776 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4844617641983123 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3710833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28997672872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.903 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OEvortex/HelpingAI2.5-10B/6a41fcba-f13d-4839-8a91-ff3f18de5114.json b/data/hfopenllm_v2/OEvortex/HelpingAI2.5-10B/6a41fcba-f13d-4839-8a91-ff3f18de5114.json new file mode 100644 index 000000000..1cffa2224 --- /dev/null +++ b/data/hfopenllm_v2/OEvortex/HelpingAI2.5-10B/6a41fcba-f13d-4839-8a91-ff3f18de5114.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OEvortex_HelpingAI2.5-10B/1762652579.798051", + "retrieved_timestamp": "1762652579.798051", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OEvortex/HelpingAI2.5-10B", + "developer": "OEvortex", + "inference_platform": "unknown", + "id": "OEvortex/HelpingAI2.5-10B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32765617450586665 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4495657491171711 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37381250000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25748005319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.211 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OliveiraJLT/Sagui-7B-Instruct-v0.1/d5135349-0757-469d-8ad3-80ef56d1f7de.json b/data/hfopenllm_v2/OliveiraJLT/Sagui-7B-Instruct-v0.1/d5135349-0757-469d-8ad3-80ef56d1f7de.json new file mode 100644 index 000000000..ac43c436e --- /dev/null +++ b/data/hfopenllm_v2/OliveiraJLT/Sagui-7B-Instruct-v0.1/d5135349-0757-469d-8ad3-80ef56d1f7de.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OliveiraJLT_Sagui-7B-Instruct-v0.1/1762652579.798249", + "retrieved_timestamp": "1762652579.798249", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OliveiraJLT/Sagui-7B-Instruct-v0.1", + "developer": "OliveiraJLT", + "inference_platform": "unknown", + "id": "OliveiraJLT/Sagui-7B-Instruct-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28916275482386733 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3110678914743868 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2424496644295302 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4190520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14852061170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Omkar1102/code-yi/2609af14-3cff-4b19-9741-e1caca56f58a.json b/data/hfopenllm_v2/Omkar1102/code-yi/2609af14-3cff-4b19-9741-e1caca56f58a.json new file mode 100644 index 000000000..7209bd367 --- /dev/null +++ b/data/hfopenllm_v2/Omkar1102/code-yi/2609af14-3cff-4b19-9741-e1caca56f58a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Omkar1102_code-yi/1762652579.79849", + "retrieved_timestamp": "1762652579.7984908", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Omkar1102/code-yi", + "developer": "Omkar1102", + "inference_platform": "unknown", + "id": "Omkar1102/code-yi" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21477457590304835 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2760062695877461 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25083892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3802291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11261635638297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.084 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Omkar1102/code-yi/3edef2ec-9fad-45ba-8fde-4af5c4f24d69.json b/data/hfopenllm_v2/Omkar1102/code-yi/3edef2ec-9fad-45ba-8fde-4af5c4f24d69.json new file mode 100644 index 000000000..e86f6a435 --- /dev/null +++ b/data/hfopenllm_v2/Omkar1102/code-yi/3edef2ec-9fad-45ba-8fde-4af5c4f24d69.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Omkar1102_code-yi/1762652579.798722", + "retrieved_timestamp": "1762652579.798723", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Omkar1102/code-yi", + "developer": "Omkar1102", + "inference_platform": "unknown", + "id": "Omkar1102/code-yi" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2254407195131141 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2750025242693941 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3761979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11228390957446809 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.084 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OmnicromsBrain/NeuralStar_FusionWriter_4x7b/65ba6556-712c-42cc-817b-ad8c2014dc4c.json b/data/hfopenllm_v2/OmnicromsBrain/NeuralStar_FusionWriter_4x7b/65ba6556-712c-42cc-817b-ad8c2014dc4c.json new file mode 100644 index 000000000..c0fa68afa --- /dev/null +++ b/data/hfopenllm_v2/OmnicromsBrain/NeuralStar_FusionWriter_4x7b/65ba6556-712c-42cc-817b-ad8c2014dc4c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OmnicromsBrain_NeuralStar_FusionWriter_4x7b/1762652579.7988968", + "retrieved_timestamp": "1762652579.798898", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OmnicromsBrain/NeuralStar_FusionWriter_4x7b", + "developer": "OmnicromsBrain", + "inference_platform": "unknown", + "id": "OmnicromsBrain/NeuralStar_FusionWriter_4x7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5963842604289951 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47762434766958123 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04909365558912387 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.401875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2605551861702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OnlyCheeini/greesychat-turbo/f3a7f01c-2893-4887-a210-d126d9135edf.json b/data/hfopenllm_v2/OnlyCheeini/greesychat-turbo/f3a7f01c-2893-4887-a210-d126d9135edf.json new file mode 100644 index 000000000..1d25216ee --- /dev/null +++ b/data/hfopenllm_v2/OnlyCheeini/greesychat-turbo/f3a7f01c-2893-4887-a210-d126d9135edf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OnlyCheeini_greesychat-turbo/1762652579.7991328", + "retrieved_timestamp": "1762652579.799134", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OnlyCheeini/greesychat-turbo", + "developer": "OnlyCheeini", + "inference_platform": "unknown", + "id": "OnlyCheeini/greesychat-turbo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.023256071667619692 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30921339082318816 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3314270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11377992021276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenAssistant/oasst-sft-1-pythia-12b/ba1129fd-f158-47ad-b194-7cff794b9ef2.json b/data/hfopenllm_v2/OpenAssistant/oasst-sft-1-pythia-12b/ba1129fd-f158-47ad-b194-7cff794b9ef2.json new file mode 100644 index 000000000..d7f561ee3 --- /dev/null +++ b/data/hfopenllm_v2/OpenAssistant/oasst-sft-1-pythia-12b/ba1129fd-f158-47ad-b194-7cff794b9ef2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenAssistant_oasst-sft-1-pythia-12b/1762652579.799746", + "retrieved_timestamp": "1762652579.799747", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenAssistant/oasst-sft-1-pythia-12b", + "developer": "OpenAssistant", + "inference_platform": "unknown", + "id": "OpenAssistant/oasst-sft-1-pythia-12b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10553885911603435 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.314662875941371 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33269791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11128656914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 12.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-falcon3-10b-v24.2-131k/19bba814-812c-49c2-acf1-9d056fd7d62d.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-falcon3-10b-v24.2-131k/19bba814-812c-49c2-acf1-9d056fd7d62d.json new file mode 100644 index 000000000..887166c82 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-falcon3-10b-v24.2-131k/19bba814-812c-49c2-acf1-9d056fd7d62d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-falcon3-10b-v24.2-131k/1762652579.800029", + "retrieved_timestamp": "1762652579.80003", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-falcon3-10b-v24.2-131k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-falcon3-10b-v24.2-131k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5086315420861093 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6003725722032135 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41864583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3833942819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.34 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k/247ee47c-e441-4020-97e3-14e3ed8d22c9.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k/247ee47c-e441-4020-97e3-14e3ed8d22c9.json new file mode 100644 index 000000000..6a881da48 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k/247ee47c-e441-4020-97e3-14e3ed8d22c9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-mixtral-7bx8-v18.1-32k/1762652579.803262", + "retrieved_timestamp": "1762652579.803263", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.549347952322061 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46561770563515265 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10800604229607251 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3830520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38040226063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.741 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.1-131k/e4e4d8f4-7e49-4b08-8a08-97e4e2c28616.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.1-131k/e4e4d8f4-7e49-4b08-8a08-97e4e2c28616.json new file mode 100644 index 000000000..304c931c9 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.1-131k/e4e4d8f4-7e49-4b08-8a08-97e4e2c28616.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-nemotron-70b-v23.1-131k/1762652579.803536", + "retrieved_timestamp": "1762652579.803537", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-nemotron-70b-v23.1-131k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-nemotron-70b-v23.1-131k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7555275557742346 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6749472828128272 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32099697885196377 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36325503355704697 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45375000000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5174534574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.2-131k/b34ca7d7-6049-4f4f-a2e3-db736009fa4d.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.2-131k/b34ca7d7-6049-4f4f-a2e3-db736009fa4d.json new file mode 100644 index 000000000..62840bf59 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-nemotron-70b-v23.2-131k/b34ca7d7-6049-4f4f-a2e3-db736009fa4d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-nemotron-70b-v23.2-131k/1762652579.803802", + "retrieved_timestamp": "1762652579.803806", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-nemotron-70b-v23.2-131k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-nemotron-70b-v23.2-131k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7226547782107031 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6704805157570325 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3157099697885196 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3598993288590604 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46959375000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5120511968085106 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.1-200k/a2b990cd-e692-44fc-8b39-ac91eab85cef.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.1-200k/a2b990cd-e692-44fc-8b39-ac91eab85cef.json new file mode 100644 index 000000000..0ce31864b --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.1-200k/a2b990cd-e692-44fc-8b39-ac91eab85cef.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwq-32b-v24.1-200k/1762652579.804893", + "retrieved_timestamp": "1762652579.804894", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-qwq-32b-v24.1-200k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-qwq-32b-v24.1-200k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.593661484860171 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6798496773637743 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37386706948640486 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.484875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5490359042553191 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.2-200k/24684939-5eb8-40b1-99dd-1ebe693680fc.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.2-200k/24684939-5eb8-40b1-99dd-1ebe693680fc.json new file mode 100644 index 000000000..b7a7e5769 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-qwq-32b-v24.2-200k/24684939-5eb8-40b1-99dd-1ebe693680fc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwq-32b-v24.2-200k/1762652579.8051221", + "retrieved_timestamp": "1762652579.8051221", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-qwq-32b-v24.2-200k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-qwq-32b-v24.2-200k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5969837808126881 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6771537576509328 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3776435045317221 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3766778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47179166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5446309840425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k/f6a36220-0b31-4b0d-9262-7e0e508e64db.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k/f6a36220-0b31-4b0d-9262-7e0e508e64db.json new file mode 100644 index 000000000..8374c4fcf --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k/f6a36220-0b31-4b0d-9262-7e0e508e64db.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-yi1.5-34b-v21.3-32k/1762652579.8053398", + "retrieved_timestamp": "1762652579.805341", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5420041046645123 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6162574860411373 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1782477341389728 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.348993288590604 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44394791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4599401595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.407 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-14b-v22.3-32k/0e288116-902d-4fef-9020-a3a4dc80e698.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-14b-v22.3-32k/0e288116-902d-4fef-9020-a3a4dc80e698.json new file mode 100644 index 000000000..532f084c4 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-14b-v22.3-32k/0e288116-902d-4fef-9020-a3a4dc80e698.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-zero-14b-v22.3-32k/1762652579.805548", + "retrieved_timestamp": "1762652579.8055491", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-zero-14b-v22.3-32k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-zero-14b-v22.3-32k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37529200299649373 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4859759816473639 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09365558912386707 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41660416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187333776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.022 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-3b-v21.2-32k/9d135662-43d6-4b05-90cb-5d2c856b0b89.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-3b-v21.2-32k/9d135662-43d6-4b05-90cb-5d2c856b0b89.json new file mode 100644 index 000000000..22cd165be --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-3b-v21.2-32k/9d135662-43d6-4b05-90cb-5d2c856b0b89.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-zero-3b-v21.2-32k/1762652579.8057752", + "retrieved_timestamp": "1762652579.8057752", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-zero-3b-v21.2-32k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-zero-3b-v21.2-32k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3802377691192702 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3934791831798414 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0188821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3566354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20337433510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.769 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-56b-v21.2-32k/7636a893-1404-4257-9778-653f3cfb601b.json b/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-56b-v21.2-32k/7636a893-1404-4257-9778-653f3cfb601b.json new file mode 100644 index 000000000..2e4286ec7 --- /dev/null +++ b/data/hfopenllm_v2/OpenBuddy/openbuddy-zero-56b-v21.2-32k/7636a893-1404-4257-9778-653f3cfb601b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-zero-56b-v21.2-32k/1762652579.8059928", + "retrieved_timestamp": "1762652579.805994", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-zero-56b-v21.2-32k", + "developer": "OpenBuddy", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-zero-56b-v21.2-32k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5057092957796425 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6128345897750148 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16238670694864046 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4305208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43991023936170215 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 56.707 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenGenerativeAI/Bifrost-14B/cde00174-ac52-42da-9641-0866739232e4.json b/data/hfopenllm_v2/OpenGenerativeAI/Bifrost-14B/cde00174-ac52-42da-9641-0866739232e4.json new file mode 100644 index 000000000..9428cbae5 --- /dev/null +++ b/data/hfopenllm_v2/OpenGenerativeAI/Bifrost-14B/cde00174-ac52-42da-9641-0866739232e4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenGenerativeAI_Bifrost-14B/1762652579.806474", + "retrieved_timestamp": "1762652579.806475", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenGenerativeAI/Bifrost-14B", + "developer": "OpenGenerativeAI", + "inference_platform": "unknown", + "id": "OpenGenerativeAI/Bifrost-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6615302951723648 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6844897889249308 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23564954682779457 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37919463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46239583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5073969414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenGenerativeAI/Bifrost/cef8e01a-071d-4ee4-997b-44679ef5b56e.json b/data/hfopenllm_v2/OpenGenerativeAI/Bifrost/cef8e01a-071d-4ee4-997b-44679ef5b56e.json new file mode 100644 index 000000000..3ec1da608 --- /dev/null +++ b/data/hfopenllm_v2/OpenGenerativeAI/Bifrost/cef8e01a-071d-4ee4-997b-44679ef5b56e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenGenerativeAI_Bifrost/1762652579.8062131", + "retrieved_timestamp": "1762652579.8062139", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenGenerativeAI/Bifrost", + "developer": "OpenGenerativeAI", + "inference_platform": "unknown", + "id": "OpenGenerativeAI/Bifrost" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6347524568145853 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6849273974523276 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2545317220543807 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36828859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45976041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5159574468085106 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-human-data/26787f2b-8f30-4cc8-b39e-447b8c53aa85.json b/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-human-data/26787f2b-8f30-4cc8-b39e-447b8c53aa85.json new file mode 100644 index 000000000..6770c161d --- /dev/null +++ b/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-human-data/26787f2b-8f30-4cc8-b39e-447b8c53aa85.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenLLM-France_Lucie-7B-Instruct-human-data/1762652579.8072178", + "retrieved_timestamp": "1762652579.807219", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenLLM-France/Lucie-7B-Instruct-human-data", + "developer": "OpenLLM-France", + "inference_platform": "unknown", + "id": "OpenLLM-France/Lucie-7B-Instruct-human-data" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29460830596151544 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32842533479733 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37285416666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14295212765957446 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.707 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-v1.1/e94a0550-93fa-448a-a4a4-187fd1b7d24e.json b/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-v1.1/e94a0550-93fa-448a-a4a4-187fd1b7d24e.json new file mode 100644 index 000000000..e185a959f --- /dev/null +++ b/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct-v1.1/e94a0550-93fa-448a-a4a4-187fd1b7d24e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenLLM-France_Lucie-7B-Instruct-v1.1/1762652579.807442", + "retrieved_timestamp": "1762652579.807442", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenLLM-France/Lucie-7B-Instruct-v1.1", + "developer": "OpenLLM-France", + "inference_platform": "unknown", + "id": "OpenLLM-France/Lucie-7B-Instruct-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3038759380665523 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38158765227444885 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03172205438066465 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37502083333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1864195478723404 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.707 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct/af17be77-0ae3-4b90-ba85-a4886450cd43.json b/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct/af17be77-0ae3-4b90-ba85-a4886450cd43.json new file mode 100644 index 000000000..ad2f63b72 --- /dev/null +++ b/data/hfopenllm_v2/OpenLLM-France/Lucie-7B-Instruct/af17be77-0ae3-4b90-ba85-a4886450cd43.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenLLM-France_Lucie-7B-Instruct/1762652579.806944", + "retrieved_timestamp": "1762652579.806945", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenLLM-France/Lucie-7B-Instruct", + "developer": "OpenLLM-France", + "inference_platform": "unknown", + "id": "OpenLLM-France/Lucie-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.279645784296777 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3254036581260458 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36621875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15558510638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.707 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/OpenLLM-France/Lucie-7B/01e4cd19-4f1f-4c30-b80f-e1d287d5d7c2.json b/data/hfopenllm_v2/OpenLLM-France/Lucie-7B/01e4cd19-4f1f-4c30-b80f-e1d287d5d7c2.json new file mode 100644 index 000000000..15a48f9e0 --- /dev/null +++ b/data/hfopenllm_v2/OpenLLM-France/Lucie-7B/01e4cd19-4f1f-4c30-b80f-e1d287d5d7c2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenLLM-France_Lucie-7B/1762652579.806693", + "retrieved_timestamp": "1762652579.8066938", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenLLM-France/Lucie-7B", + "developer": "OpenLLM-France", + "inference_platform": "unknown", + "id": "OpenLLM-France/Lucie-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24964538535530173 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3492469872973046 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39232291666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14976728723404256 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.707 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Orion-zhen/Qwen2.5-7B-Instruct-Uncensored/141239bb-c7e3-4c38-b289-12cd59f592d2.json b/data/hfopenllm_v2/Orion-zhen/Qwen2.5-7B-Instruct-Uncensored/141239bb-c7e3-4c38-b289-12cd59f592d2.json new file mode 100644 index 000000000..88d90a830 --- /dev/null +++ b/data/hfopenllm_v2/Orion-zhen/Qwen2.5-7B-Instruct-Uncensored/141239bb-c7e3-4c38-b289-12cd59f592d2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Orion-zhen_Qwen2.5-7B-Instruct-Uncensored/1762652579.808624", + "retrieved_timestamp": "1762652579.808625", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Orion-zhen/Qwen2.5-7B-Instruct-Uncensored", + "developer": "Orion-zhen", + "inference_platform": "unknown", + "id": "Orion-zhen/Qwen2.5-7B-Instruct-Uncensored" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7204317876567508 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5473918652157296 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4773413897280967 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43613541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4426529255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/P0x0/Astra-v1-12B/349ae5f5-55d0-4486-a6dc-2b5644fac045.json b/data/hfopenllm_v2/P0x0/Astra-v1-12B/349ae5f5-55d0-4486-a6dc-2b5644fac045.json new file mode 100644 index 000000000..6df3f43b9 --- /dev/null +++ b/data/hfopenllm_v2/P0x0/Astra-v1-12B/349ae5f5-55d0-4486-a6dc-2b5644fac045.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/P0x0_Astra-v1-12B/1762652579.8091059", + "retrieved_timestamp": "1762652579.8091059", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "P0x0/Astra-v1-12B", + "developer": "P0x0", + "inference_platform": "unknown", + "id": "P0x0/Astra-v1-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28059437847134494 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5214506484138984 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11329305135951662 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4051875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3460771276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B/3c942d2f-0b53-498e-ab05-71d5075cb974.json b/data/hfopenllm_v2/PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B/3c942d2f-0b53-498e-ab05-71d5075cb974.json new file mode 100644 index 000000000..60be40392 --- /dev/null +++ b/data/hfopenllm_v2/PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B/3c942d2f-0b53-498e-ab05-71d5075cb974.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PJMixers-Dev_L3.2-Instruct-Thinking-v0.1-1B/1762652579.8095942", + "retrieved_timestamp": "1762652579.8095949", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B", + "developer": "PJMixers-Dev", + "inference_platform": "unknown", + "id": "PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46276989498973836 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33018063718974094 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32621875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14827127659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/fb66b283-bfd6-4437-95b7-d74a0d8d2814.json b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/fb66b283-bfd6-4437-95b7-d74a0d8d2814.json new file mode 100644 index 000000000..bf7148a99 --- /dev/null +++ b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/fb66b283-bfd6-4437-95b7-d74a0d8d2814.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/1762652579.809847", + "retrieved_timestamp": "1762652579.809848", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B", + "developer": "PJMixers-Dev", + "inference_platform": "unknown", + "id": "PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7871015572015585 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5073267838961463 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2001510574018127 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3869895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3767453457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.047 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/1d91cdce-0bdb-4567-9296-6225db3aa0bc.json b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/1d91cdce-0bdb-4567-9296-6225db3aa0bc.json new file mode 100644 index 000000000..4e906300c --- /dev/null +++ b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/1d91cdce-0bdb-4567-9296-6225db3aa0bc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/1762652579.8105159", + "retrieved_timestamp": "1762652579.810517", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B", + "developer": "PJMixers-Dev", + "inference_platform": "unknown", + "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.693054428915278 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4556166737589294 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1216012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37003125000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.312749335106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/d1875dfd-05ab-4a49-8c7f-02cddf35a695.json b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/d1875dfd-05ab-4a49-8c7f-02cddf35a695.json new file mode 100644 index 000000000..df463114b --- /dev/null +++ b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/d1875dfd-05ab-4a49-8c7f-02cddf35a695.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/1762652579.810729", + "retrieved_timestamp": "1762652579.81073", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B", + "developer": "PJMixers-Dev", + "inference_platform": "unknown", + "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6291573026237051 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45814952191015346 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.365875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3115026595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/62b12d95-1da2-407c-8552-8c5e951c5c85.json b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/62b12d95-1da2-407c-8552-8c5e951c5c85.json new file mode 100644 index 000000000..8461c6822 --- /dev/null +++ b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/62b12d95-1da2-407c-8552-8c5e951c5c85.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/1762652579.8109388", + "retrieved_timestamp": "1762652579.8109398", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B", + "developer": "PJMixers-Dev", + "inference_platform": "unknown", + "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6503898544750152 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45107942950222196 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12613293051359517 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3687291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3107546542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/56f36430-4bb1-425d-ac4b-30d85237667c.json b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/56f36430-4bb1-425d-ac4b-30d85237667c.json new file mode 100644 index 000000000..22cc4533d --- /dev/null +++ b/data/hfopenllm_v2/PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/56f36430-4bb1-425d-ac4b-30d85237667c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/1762652579.8111491", + "retrieved_timestamp": "1762652579.81115", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B", + "developer": "PJMixers-Dev", + "inference_platform": "unknown", + "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5040858256093831 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4483158594793648 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13066465256797583 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3515520833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.308344414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Parissa3/test-model/53cb44c7-f7bc-40fa-88e7-511b9dfab004.json b/data/hfopenllm_v2/Parissa3/test-model/53cb44c7-f7bc-40fa-88e7-511b9dfab004.json new file mode 100644 index 000000000..f200d621a --- /dev/null +++ b/data/hfopenllm_v2/Parissa3/test-model/53cb44c7-f7bc-40fa-88e7-511b9dfab004.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Parissa3_test-model/1762652579.811859", + "retrieved_timestamp": "1762652579.81186", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Parissa3/test-model", + "developer": "Parissa3", + "inference_platform": "unknown", + "id": "Parissa3/test-model" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3882564927725103 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5193916761801759 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46853125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3056848404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ/c604f0fb-517d-45db-9e1c-6c911bce43e7.json b/data/hfopenllm_v2/Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ/c604f0fb-517d-45db-9e1c-6c911bce43e7.json new file mode 100644 index 000000000..6b1077cfc --- /dev/null +++ b/data/hfopenllm_v2/Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ/c604f0fb-517d-45db-9e1c-6c911bce43e7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Pinkstack_SuperThoughts-CoT-14B-16k-o1-QwQ/1762652579.812447", + "retrieved_timestamp": "1762652579.812449", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ", + "developer": "Pinkstack", + "inference_platform": "unknown", + "id": "Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.051457909458015844 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6719989821162488 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4199395770392749 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3926174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4913541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.526845079787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pinkstack/Superthoughts-lite-1.8B-experimental-o1/fba2ce2f-6c30-4af9-ae3a-d23f39f3f963.json b/data/hfopenllm_v2/Pinkstack/Superthoughts-lite-1.8B-experimental-o1/fba2ce2f-6c30-4af9-ae3a-d23f39f3f963.json new file mode 100644 index 000000000..429cf3f9a --- /dev/null +++ b/data/hfopenllm_v2/Pinkstack/Superthoughts-lite-1.8B-experimental-o1/fba2ce2f-6c30-4af9-ae3a-d23f39f3f963.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Pinkstack_Superthoughts-lite-1.8B-experimental-o1/1762652579.81273", + "retrieved_timestamp": "1762652579.81273", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pinkstack/Superthoughts-lite-1.8B-experimental-o1", + "developer": "Pinkstack", + "inference_platform": "unknown", + "id": "Pinkstack/Superthoughts-lite-1.8B-experimental-o1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0375193375798437 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3434736647957908 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03172205438066465 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33539583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18508976063829788 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.812 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pinkstack/Superthoughts-lite-v1/ff308837-dc35-4257-a4cd-de463feb733e.json b/data/hfopenllm_v2/Pinkstack/Superthoughts-lite-v1/ff308837-dc35-4257-a4cd-de463feb733e.json new file mode 100644 index 000000000..853df06e6 --- /dev/null +++ b/data/hfopenllm_v2/Pinkstack/Superthoughts-lite-v1/ff308837-dc35-4257-a4cd-de463feb733e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Pinkstack_Superthoughts-lite-v1/1762652579.812961", + "retrieved_timestamp": "1762652579.812962", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pinkstack/Superthoughts-lite-v1", + "developer": "Pinkstack", + "inference_platform": "unknown", + "id": "Pinkstack/Superthoughts-lite-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1658643510330368 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3465571905256149 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3671770833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17553191489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.711 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PocketDoc/Dans-Instruct-CoreCurriculum-12b/d8145a39-f1d0-4b6e-958b-a96585eeec9f.json b/data/hfopenllm_v2/PocketDoc/Dans-Instruct-CoreCurriculum-12b/d8145a39-f1d0-4b6e-958b-a96585eeec9f.json new file mode 100644 index 000000000..05ae87b10 --- /dev/null +++ b/data/hfopenllm_v2/PocketDoc/Dans-Instruct-CoreCurriculum-12b/d8145a39-f1d0-4b6e-958b-a96585eeec9f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-Instruct-CoreCurriculum-12b/1762652579.81328", + "retrieved_timestamp": "1762652579.813282", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PocketDoc/Dans-Instruct-CoreCurriculum-12b", + "developer": "PocketDoc", + "inference_platform": "unknown", + "id": "PocketDoc/Dans-Instruct-CoreCurriculum-12b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21914520139895477 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3788739075240266 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4095625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1219248670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.1.0-12b/c005ab13-1d42-4e28-802e-12438aab35a4.json b/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.1.0-12b/c005ab13-1d42-4e28-802e-12438aab35a4.json new file mode 100644 index 000000000..c5296ef42 --- /dev/null +++ b/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.1.0-12b/c005ab13-1d42-4e28-802e-12438aab35a4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-PersonalityEngine-V1.1.0-12b/1762652579.813654", + "retrieved_timestamp": "1762652579.8136551", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PocketDoc/Dans-PersonalityEngine-V1.1.0-12b", + "developer": "PocketDoc", + "inference_platform": "unknown", + "id": "PocketDoc/Dans-PersonalityEngine-V1.1.0-12b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7074672978807343 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5361046243199591 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10498489425981873 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45867708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32621343085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.2.0-24b/38dd1b21-b357-4daf-94b3-c4a28809e56c.json b/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.2.0-24b/38dd1b21-b357-4daf-94b3-c4a28809e56c.json new file mode 100644 index 000000000..5a9b6f084 --- /dev/null +++ b/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-V1.2.0-24b/38dd1b21-b357-4daf-94b3-c4a28809e56c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-PersonalityEngine-V1.2.0-24b/1762652579.813962", + "retrieved_timestamp": "1762652579.813962", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PocketDoc/Dans-PersonalityEngine-V1.2.0-24b", + "developer": "PocketDoc", + "inference_platform": "unknown", + "id": "PocketDoc/Dans-PersonalityEngine-V1.2.0-24b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7886252920029965 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6421213844206719 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24546827794561935 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42996875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5025764627659575 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-v1.0.0-8b/f3623b9f-3e3f-4b7b-a9f5-f0a15bf26f48.json b/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-v1.0.0-8b/f3623b9f-3e3f-4b7b-a9f5-f0a15bf26f48.json new file mode 100644 index 000000000..ae08b2e86 --- /dev/null +++ b/data/hfopenllm_v2/PocketDoc/Dans-PersonalityEngine-v1.0.0-8b/f3623b9f-3e3f-4b7b-a9f5-f0a15bf26f48.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-PersonalityEngine-v1.0.0-8b/1762652579.814201", + "retrieved_timestamp": "1762652579.814202", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PocketDoc/Dans-PersonalityEngine-v1.0.0-8b", + "developer": "PocketDoc", + "inference_platform": "unknown", + "id": "PocketDoc/Dans-PersonalityEngine-v1.0.0-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.498190357141274 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47325544259149366 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08157099697885196 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35415625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3065159574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PocketDoc/Dans-SakuraKaze-V1.0.0-12b/b78ef40e-91b1-401d-9576-1ac2f600b32a.json b/data/hfopenllm_v2/PocketDoc/Dans-SakuraKaze-V1.0.0-12b/b78ef40e-91b1-401d-9576-1ac2f600b32a.json new file mode 100644 index 000000000..529f1f567 --- /dev/null +++ b/data/hfopenllm_v2/PocketDoc/Dans-SakuraKaze-V1.0.0-12b/b78ef40e-91b1-401d-9576-1ac2f600b32a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-SakuraKaze-V1.0.0-12b/1762652579.81442", + "retrieved_timestamp": "1762652579.81442", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PocketDoc/Dans-SakuraKaze-V1.0.0-12b", + "developer": "PocketDoc", + "inference_platform": "unknown", + "id": "PocketDoc/Dans-SakuraKaze-V1.0.0-12b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6520133246452745 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5405357251132225 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09290030211480363 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47452083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35596742021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PowerInfer/SmallThinker-3B-Preview/6613aff7-8f26-4b74-b08b-37fbd7990e42.json b/data/hfopenllm_v2/PowerInfer/SmallThinker-3B-Preview/6613aff7-8f26-4b74-b08b-37fbd7990e42.json new file mode 100644 index 000000000..d5bbfefc0 --- /dev/null +++ b/data/hfopenllm_v2/PowerInfer/SmallThinker-3B-Preview/6613aff7-8f26-4b74-b08b-37fbd7990e42.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PowerInfer_SmallThinker-3B-Preview/1762652579.814635", + "retrieved_timestamp": "1762652579.814636", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PowerInfer/SmallThinker-3B-Preview", + "developer": "PowerInfer", + "inference_platform": "unknown", + "id": "PowerInfer/SmallThinker-3B-Preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6199650261306666 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4494922016660919 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27794561933534745 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3524791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3017785904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PranavHarshan/MedNarra-X1/86023703-88e2-4219-b38b-4c871e2ee381.json b/data/hfopenllm_v2/PranavHarshan/MedNarra-X1/86023703-88e2-4219-b38b-4c871e2ee381.json new file mode 100644 index 000000000..eb55bd369 --- /dev/null +++ b/data/hfopenllm_v2/PranavHarshan/MedNarra-X1/86023703-88e2-4219-b38b-4c871e2ee381.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PranavHarshan_MedNarra-X1/1762652579.815135", + "retrieved_timestamp": "1762652579.815136", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PranavHarshan/MedNarra-X1", + "developer": "PranavHarshan", + "inference_platform": "unknown", + "id": "PranavHarshan/MedNarra-X1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43384331351924005 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46371668179774184 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35403125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34308510638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended/eca9180f-20d5-4bcd-9a74-e2f69c4ea4ad.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended/eca9180f-20d5-4bcd-9a74-e2f69c4ea4ad.json new file mode 100644 index 000000000..5768a1c66 --- /dev/null +++ b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended/eca9180f-20d5-4bcd-9a74-e2f69c4ea4ad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Appended/1762652579.815407", + "retrieved_timestamp": "1762652579.815407", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended", + "developer": "Pretergeek", + "inference_platform": "unknown", + "id": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5960595663949432 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4619637884426022 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07930513595166164 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42540625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3289561170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved/65d32305-4f23-4041-a107-8625822c1322.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved/65d32305-4f23-4041-a107-8625822c1322.json new file mode 100644 index 000000000..216cca5fe --- /dev/null +++ b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved/65d32305-4f23-4041-a107-8625822c1322.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Interleaved/1762652579.81567", + "retrieved_timestamp": "1762652579.815671", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved", + "developer": "Pretergeek", + "inference_platform": "unknown", + "id": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5960595663949432 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4619637884426022 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07779456193353475 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42540625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3298703457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_32K-PoSE/195acbac-1db7-47ed-907f-98e312fc8921.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_32K-PoSE/195acbac-1db7-47ed-907f-98e312fc8921.json new file mode 100644 index 000000000..0a862b2d0 --- /dev/null +++ b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_32K-PoSE/195acbac-1db7-47ed-907f-98e312fc8921.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_32K-PoSE/1762652579.815889", + "retrieved_timestamp": "1762652579.8158898", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pretergeek/OpenChat-3.5-0106_32K-PoSE", + "developer": "Pretergeek", + "inference_platform": "unknown", + "id": "Pretergeek/OpenChat-3.5-0106_32K-PoSE" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3968991165662664 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3471309425137119 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42054166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.203125 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended/349bccfd-1816-4845-a1b9-2d9f4936adea.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended/349bccfd-1816-4845-a1b9-2d9f4936adea.json new file mode 100644 index 000000000..4fde5126f --- /dev/null +++ b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended/349bccfd-1816-4845-a1b9-2d9f4936adea.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Appended/1762652579.8160908", + "retrieved_timestamp": "1762652579.8160908", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended", + "developer": "Pretergeek", + "inference_platform": "unknown", + "id": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5975833011963811 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4619637884426022 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07930513595166164 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42540625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3289561170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.114 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved/c2e26b8a-3a12-4cb8-888e-96affc8cbac9.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved/c2e26b8a-3a12-4cb8-888e-96affc8cbac9.json new file mode 100644 index 000000000..71666bfdd --- /dev/null +++ b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved/c2e26b8a-3a12-4cb8-888e-96affc8cbac9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Interleaved/1762652579.8163", + "retrieved_timestamp": "1762652579.8163", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved", + "developer": "Pretergeek", + "inference_platform": "unknown", + "id": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5960595663949432 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46213045510926887 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07779456193353475 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42407291666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3298703457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.114 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended/a70222dc-0589-4f09-ac8c-3ff4fa72328f.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended/a70222dc-0589-4f09-ac8c-3ff4fa72328f.json new file mode 100644 index 000000000..bff407760 --- /dev/null +++ b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended/a70222dc-0589-4f09-ac8c-3ff4fa72328f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Appended/1762652579.81651", + "retrieved_timestamp": "1762652579.816511", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended", + "developer": "Pretergeek", + "inference_platform": "unknown", + "id": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5960595663949432 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4619637884426022 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07930513595166164 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42540625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3289561170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.987 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved/19eb8f3a-ca9d-4da4-8e7e-96eebfd33576.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved/19eb8f3a-ca9d-4da4-8e7e-96eebfd33576.json new file mode 100644 index 000000000..28b86b520 --- /dev/null +++ b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved/19eb8f3a-ca9d-4da4-8e7e-96eebfd33576.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Interleaved/1762652579.816719", + "retrieved_timestamp": "1762652579.816719", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved", + "developer": "Pretergeek", + "inference_platform": "unknown", + "id": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5975833011963811 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46213045510926887 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07779456193353475 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42407291666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3298703457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.987 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended/e44eddb9-9764-4bc9-be85-ec7995846da0.json b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended/e44eddb9-9764-4bc9-be85-ec7995846da0.json new file mode 100644 index 000000000..c95aa6009 --- /dev/null +++ b/data/hfopenllm_v2/Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended/e44eddb9-9764-4bc9-be85-ec7995846da0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_9.86B_44Layers-Appended/1762652579.816936", + "retrieved_timestamp": "1762652579.816937", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended", + "developer": "Pretergeek", + "inference_platform": "unknown", + "id": "Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5960595663949432 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4619637884426022 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07930513595166164 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42540625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3289561170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 9.859 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1-Instruct/ea823c15-3c92-4a67-a4fd-7826a9dd9e41.json b/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1-Instruct/ea823c15-3c92-4a67-a4fd-7826a9dd9e41.json new file mode 100644 index 000000000..96d677d00 --- /dev/null +++ b/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1-Instruct/ea823c15-3c92-4a67-a4fd-7826a9dd9e41.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PrimeIntellect_INTELLECT-1-Instruct/1762652579.817848", + "retrieved_timestamp": "1762652579.8178492", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PrimeIntellect/INTELLECT-1-Instruct", + "developer": "PrimeIntellect", + "inference_platform": "unknown", + "id": "PrimeIntellect/INTELLECT-1-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28698007801214714 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.022658610271903322 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2483221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3576875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10638297872340426 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.211 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/bfffc240-22ab-4cc0-97c8-466ddf472ac4.json b/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/bfffc240-22ab-4cc0-97c8-466ddf472ac4.json new file mode 100644 index 000000000..5bd185ec4 --- /dev/null +++ b/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/bfffc240-22ab-4cc0-97c8-466ddf472ac4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PrimeIntellect_INTELLECT-1/1762652579.8176599", + "retrieved_timestamp": "1762652579.817661", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PrimeIntellect/INTELLECT-1", + "developer": "PrimeIntellect", + "inference_platform": "unknown", + "id": "PrimeIntellect/INTELLECT-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1757315035217667 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27398007801214713 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3752708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11203457446808511 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.211 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/fee7966f-3e1b-43d9-b129-b0c23aac53b5.json b/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/fee7966f-3e1b-43d9-b129-b0c23aac53b5.json new file mode 100644 index 000000000..8105f4c3a --- /dev/null +++ b/data/hfopenllm_v2/PrimeIntellect/INTELLECT-1/fee7966f-3e1b-43d9-b129-b0c23aac53b5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PrimeIntellect_INTELLECT-1/1762652579.817406", + "retrieved_timestamp": "1762652579.817406", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PrimeIntellect/INTELLECT-1", + "developer": "PrimeIntellect", + "inference_platform": "unknown", + "id": "PrimeIntellect/INTELLECT-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1757315035217667 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27598007801214713 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3339375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11228390957446809 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.211 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PuxAI/LUA_model/05dc0500-be97-456f-9d12-12192626ea39.json b/data/hfopenllm_v2/PuxAI/LUA_model/05dc0500-be97-456f-9d12-12192626ea39.json new file mode 100644 index 000000000..1b0deb710 --- /dev/null +++ b/data/hfopenllm_v2/PuxAI/LUA_model/05dc0500-be97-456f-9d12-12192626ea39.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PuxAI_LUA_model/1762652579.818059", + "retrieved_timestamp": "1762652579.818059", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PuxAI/LUA_model", + "developer": "PuxAI", + "inference_platform": "unknown", + "id": "PuxAI/LUA_model" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22821336276634885 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2876778102988436 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34838541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11228390957446809 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.386 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/PygmalionAI/pygmalion-6b/7cdfef58-c871-4158-b97d-ed843f7d667b.json b/data/hfopenllm_v2/PygmalionAI/pygmalion-6b/7cdfef58-c871-4158-b97d-ed843f7d667b.json new file mode 100644 index 000000000..5f1d57295 --- /dev/null +++ b/data/hfopenllm_v2/PygmalionAI/pygmalion-6b/7cdfef58-c871-4158-b97d-ed843f7d667b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PygmalionAI_pygmalion-6b/1762652579.818316", + "retrieved_timestamp": "1762652579.8183172", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PygmalionAI/pygmalion-6b", + "developer": "PygmalionAI", + "inference_platform": "unknown", + "id": "PygmalionAI/pygmalion-6b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20910406610016974 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31988944643860034 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.008308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24916107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3683541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11835106382978723 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTJForCausalLM", + "params_billions": 6.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Q-bert/MetaMath-1B/713b1c64-9637-4d83-aee9-f81988fec0b5.json b/data/hfopenllm_v2/Q-bert/MetaMath-1B/713b1c64-9637-4d83-aee9-f81988fec0b5.json new file mode 100644 index 000000000..730a14cc1 --- /dev/null +++ b/data/hfopenllm_v2/Q-bert/MetaMath-1B/713b1c64-9637-4d83-aee9-f81988fec0b5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Q-bert_MetaMath-1B/1762652579.8185658", + "retrieved_timestamp": "1762652579.8185658", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Q-bert/MetaMath-1B", + "developer": "Q-bert", + "inference_platform": "unknown", + "id": "Q-bert/MetaMath-1B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5300391849182392 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34506863677929517 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06268882175226587 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3289166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1495179521276596 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/1up-14b/c315527d-ea14-42a8-a002-4bb67c085fc0.json b/data/hfopenllm_v2/Quazim0t0/1up-14b/c315527d-ea14-42a8-a002-4bb67c085fc0.json new file mode 100644 index 000000000..c1f8f1853 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/1up-14b/c315527d-ea14-42a8-a002-4bb67c085fc0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_1up-14b/1762652579.818811", + "retrieved_timestamp": "1762652579.818812", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/1up-14b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/1up-14b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6888079185450161 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6920935635451656 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4161631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3624161073825503 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4583333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5406416223404256 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Adamant-14B-sce/7ed9dcc6-7915-4a7e-a190-07e067d2fd79.json b/data/hfopenllm_v2/Quazim0t0/Adamant-14B-sce/7ed9dcc6-7915-4a7e-a190-07e067d2fd79.json new file mode 100644 index 000000000..4d0f05d41 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Adamant-14B-sce/7ed9dcc6-7915-4a7e-a190-07e067d2fd79.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Adamant-14B-sce/1762652579.819103", + "retrieved_timestamp": "1762652579.819104", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Adamant-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Adamant-14B-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6857604489421402 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6858943778247303 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3987915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35067114093959734 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45579166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5371509308510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Alice-14B/3dd99496-1274-439f-b7c2-1fd731745753.json b/data/hfopenllm_v2/Quazim0t0/Alice-14B/3dd99496-1274-439f-b7c2-1fd731745753.json new file mode 100644 index 000000000..5bedfd990 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Alice-14B/3dd99496-1274-439f-b7c2-1fd731745753.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Alice-14B/1762652579.819317", + "retrieved_timestamp": "1762652579.819317", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Alice-14B", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Alice-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6836371937570092 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6937748567349198 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4569486404833837 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35151006711409394 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44794791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5418882978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Alien-CoT-14B-sce/dc89616f-c86d-41d0-9945-12703dc8f905.json b/data/hfopenllm_v2/Quazim0t0/Alien-CoT-14B-sce/dc89616f-c86d-41d0-9945-12703dc8f905.json new file mode 100644 index 000000000..6143cdc30 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Alien-CoT-14B-sce/dc89616f-c86d-41d0-9945-12703dc8f905.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Alien-CoT-14B-sce/1762652579.819517", + "retrieved_timestamp": "1762652579.8195179", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Alien-CoT-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Alien-CoT-14B-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07486358417886763 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6395487523790632 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.520392749244713 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39177852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47852083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5170378989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Aura-8B-Linear/2d22ab53-547d-41bb-8700-12bc5b16c97d.json b/data/hfopenllm_v2/Quazim0t0/Aura-8B-Linear/2d22ab53-547d-41bb-8700-12bc5b16c97d.json new file mode 100644 index 000000000..807d541bc --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Aura-8B-Linear/2d22ab53-547d-41bb-8700-12bc5b16c97d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Aura-8B-Linear/1762652579.819725", + "retrieved_timestamp": "1762652579.819726", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Aura-8B-Linear", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Aura-8B-Linear" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.794770098893159 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5074298101934884 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18051359516616314 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3686979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3800698138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/09bbb732-62d8-4cec-972a-273b728df1f4.json b/data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/09bbb732-62d8-4cec-972a-273b728df1f4.json new file mode 100644 index 000000000..026e7f57a --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/09bbb732-62d8-4cec-972a-273b728df1f4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Casa-14b-sce/1762652579.8199282", + "retrieved_timestamp": "1762652579.8199282", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Casa-14b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Casa-14b-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6653523761397536 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6901033460664828 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4697885196374622 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33305369127516776 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43102083333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5425531914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/a0dde1eb-a763-4568-8122-1b280dedb2ce.json b/data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/a0dde1eb-a763-4568-8122-1b280dedb2ce.json new file mode 100644 index 000000000..2ff50b9e9 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Casa-14b-sce/a0dde1eb-a763-4568-8122-1b280dedb2ce.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Casa-14b-sce/1762652579.820149", + "retrieved_timestamp": "1762652579.820149", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Casa-14b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Casa-14b-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6718218770639681 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6891400252742456 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4984894259818731 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3338926174496644 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4322916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5408078457446809 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Charlie-8B-Linear/c56d7463-dad2-4c9c-8823-a4b6faa5aeb9.json b/data/hfopenllm_v2/Quazim0t0/Charlie-8B-Linear/c56d7463-dad2-4c9c-8823-a4b6faa5aeb9.json new file mode 100644 index 000000000..f509976d4 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Charlie-8B-Linear/c56d7463-dad2-4c9c-8823-a4b6faa5aeb9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Charlie-8B-Linear/1762652579.820338", + "retrieved_timestamp": "1762652579.820339", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Charlie-8B-Linear", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Charlie-8B-Linear" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7380672172059026 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5141359215016831 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26510574018126887 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3485416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3572972074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Chromatic-8b-sce/f626897d-5003-40fa-8020-c100748a847f.json b/data/hfopenllm_v2/Quazim0t0/Chromatic-8b-sce/f626897d-5003-40fa-8020-c100748a847f.json new file mode 100644 index 000000000..ea86c76f8 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Chromatic-8b-sce/f626897d-5003-40fa-8020-c100748a847f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Chromatic-8b-sce/1762652579.8205519", + "retrieved_timestamp": "1762652579.820553", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Chromatic-8b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Chromatic-8b-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5085074269604649 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5063171816307924 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1555891238670695 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.405125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37549867021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Dyson-14b/35c401bd-ed12-475e-afbc-e664243d90d5.json b/data/hfopenllm_v2/Quazim0t0/Dyson-14b/35c401bd-ed12-475e-afbc-e664243d90d5.json new file mode 100644 index 000000000..4a7a55aa4 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Dyson-14b/35c401bd-ed12-475e-afbc-e664243d90d5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Dyson-14b/1762652579.821013", + "retrieved_timestamp": "1762652579.821014", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Dyson-14b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Dyson-14b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5856682491345186 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6862902828866305 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5392749244712991 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4259375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5398936170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Edu-14B-Linear/a70e7642-3cc7-4719-bc22-68182baa3857.json b/data/hfopenllm_v2/Quazim0t0/Edu-14B-Linear/a70e7642-3cc7-4719-bc22-68182baa3857.json new file mode 100644 index 000000000..60152df1e --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Edu-14B-Linear/a70e7642-3cc7-4719-bc22-68182baa3857.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Edu-14B-Linear/1762652579.821216", + "retrieved_timestamp": "1762652579.821216", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Edu-14B-Linear", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Edu-14B-Linear" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6158182511292261 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6757820996225599 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24471299093655588 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43775000000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.508560505319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Fugazi14b/ee38e1c3-7a6b-4357-94ac-b309da33d14b.json b/data/hfopenllm_v2/Quazim0t0/Fugazi14b/ee38e1c3-7a6b-4357-94ac-b309da33d14b.json new file mode 100644 index 000000000..8bb386dd3 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Fugazi14b/ee38e1c3-7a6b-4357-94ac-b309da33d14b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Fugazi14b/1762652579.8215911", + "retrieved_timestamp": "1762652579.821592", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Fugazi14b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Fugazi14b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6997987561891337 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6941017680723065 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4652567975830816 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35151006711409394 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45455208333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5417220744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/GZA-14B-sce/cfb61ec3-ab7e-4697-892e-a8dd62518f39.json b/data/hfopenllm_v2/Quazim0t0/GZA-14B-sce/cfb61ec3-ab7e-4697-892e-a8dd62518f39.json new file mode 100644 index 000000000..d2dd0e58f --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/GZA-14B-sce/cfb61ec3-ab7e-4697-892e-a8dd62518f39.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_GZA-14B-sce/1762652579.821823", + "retrieved_timestamp": "1762652579.821824", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/GZA-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/GZA-14B-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6274086091570367 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6686539892126272 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47205438066465255 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4284791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.523188164893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Geedorah-14B/c4a79914-b049-436b-9de6-640cc3e119ee.json b/data/hfopenllm_v2/Quazim0t0/Geedorah-14B/c4a79914-b049-436b-9de6-640cc3e119ee.json new file mode 100644 index 000000000..449538b4d --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Geedorah-14B/c4a79914-b049-436b-9de6-640cc3e119ee.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Geedorah-14B/1762652579.822031", + "retrieved_timestamp": "1762652579.822032", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Geedorah-14B", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Geedorah-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6872841837435781 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6964189914061528 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44486404833836857 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34731543624161076 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45467708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5421376329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/GivingTree-8b-sce/9b753075-a150-4bc3-9425-2371010daf8b.json b/data/hfopenllm_v2/Quazim0t0/GivingTree-8b-sce/9b753075-a150-4bc3-9425-2371010daf8b.json new file mode 100644 index 000000000..4abeac26d --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/GivingTree-8b-sce/9b753075-a150-4bc3-9425-2371010daf8b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_GivingTree-8b-sce/1762652579.8222332", + "retrieved_timestamp": "1762652579.8222342", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/GivingTree-8b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/GivingTree-8b-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5006139266036339 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5040482025572203 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15256797583081572 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221476510067114 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.405125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37608045212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/GuiltySpark-14B-ties/2b50b73e-9734-4502-b088-8d4936291aaa.json b/data/hfopenllm_v2/Quazim0t0/GuiltySpark-14B-ties/2b50b73e-9734-4502-b088-8d4936291aaa.json new file mode 100644 index 000000000..7e6adf266 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/GuiltySpark-14B-ties/2b50b73e-9734-4502-b088-8d4936291aaa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_GuiltySpark-14B-ties/1762652579.822431", + "retrieved_timestamp": "1762652579.822432", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/GuiltySpark-14B-ties", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/GuiltySpark-14B-ties" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6854357549080883 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6914302574038697 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38368580060422963 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3649328859060403 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4557291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5399767287234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Halo-14B-sce/156424f1-2a1e-4e61-b081-bb066ee3958d.json b/data/hfopenllm_v2/Quazim0t0/Halo-14B-sce/156424f1-2a1e-4e61-b081-bb066ee3958d.json new file mode 100644 index 000000000..709ddae21 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Halo-14B-sce/156424f1-2a1e-4e61-b081-bb066ee3958d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Halo-14B-sce/1762652579.822633", + "retrieved_timestamp": "1762652579.822633", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Halo-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Halo-14B-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6753691316817156 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6875692490185378 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42900302114803623 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34731543624161076 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44007291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5376496010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Heretic1.5b/e3d7453d-0ba6-4980-be81-827122149bb6.json b/data/hfopenllm_v2/Quazim0t0/Heretic1.5b/e3d7453d-0ba6-4980-be81-827122149bb6.json new file mode 100644 index 000000000..dbf0fe0b3 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Heretic1.5b/e3d7453d-0ba6-4980-be81-827122149bb6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Heretic1.5b/1762652579.8228369", + "retrieved_timestamp": "1762652579.8228369", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Heretic1.5b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Heretic1.5b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20615633186611523 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3529180801121154 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24395770392749244 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3511458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17278922872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.73 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Hyde-14b-sce/814ce716-6f61-4980-a8f6-7918c7b0eea5.json b/data/hfopenllm_v2/Quazim0t0/Hyde-14b-sce/814ce716-6f61-4980-a8f6-7918c7b0eea5.json new file mode 100644 index 000000000..ecb6e8528 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Hyde-14b-sce/814ce716-6f61-4980-a8f6-7918c7b0eea5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Hyde-14b-sce/1762652579.823039", + "retrieved_timestamp": "1762652579.823039", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Hyde-14b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Hyde-14b-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6715470507143269 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6885164810743584 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27341389728096677 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3414429530201342 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41409375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5300033244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Imagine-v0.5-16bit/ccb33ad4-98f5-4980-a442-1a1772fab792.json b/data/hfopenllm_v2/Quazim0t0/Imagine-v0.5-16bit/ccb33ad4-98f5-4980-a442-1a1772fab792.json new file mode 100644 index 000000000..c29cc7b9a --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Imagine-v0.5-16bit/ccb33ad4-98f5-4980-a442-1a1772fab792.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Imagine-v0.5-16bit/1762652579.823242", + "retrieved_timestamp": "1762652579.823243", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Imagine-v0.5-16bit", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Imagine-v0.5-16bit" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2758990589413866 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6769135492947932 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13972809667673716 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3649328859060403 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43492708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.535405585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Imbue-14b/c50c07fc-b529-43c9-9f3d-0f1ff174b905.json b/data/hfopenllm_v2/Quazim0t0/Imbue-14b/c50c07fc-b529-43c9-9f3d-0f1ff174b905.json new file mode 100644 index 000000000..751705fff --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Imbue-14b/c50c07fc-b529-43c9-9f3d-0f1ff174b905.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Imbue-14b/1762652579.8234398", + "retrieved_timestamp": "1762652579.8234408", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Imbue-14b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Imbue-14b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5199725616918665 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6845292092854045 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5317220543806647 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41672916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5402260638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Insom/51f419c6-1107-41c9-896b-fadbbde4f5e9.json b/data/hfopenllm_v2/Quazim0t0/Insom/51f419c6-1107-41c9-896b-fadbbde4f5e9.json new file mode 100644 index 000000000..b68872214 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Insom/51f419c6-1107-41c9-896b-fadbbde4f5e9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Insom/1762652579.823634", + "retrieved_timestamp": "1762652579.8236349", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Insom", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Insom" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.68183863260593 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6881456689046391 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3498322147651007 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43114583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5352393617021277 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/InspectorDeck-14B-sce/1ac547e3-1b29-462a-aa08-1e9ef9e3f409.json b/data/hfopenllm_v2/Quazim0t0/InspectorDeck-14B-sce/1ac547e3-1b29-462a-aa08-1e9ef9e3f409.json new file mode 100644 index 000000000..a04e07853 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/InspectorDeck-14B-sce/1ac547e3-1b29-462a-aa08-1e9ef9e3f409.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_InspectorDeck-14B-sce/1762652579.8238342", + "retrieved_timestamp": "1762652579.8238342", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/InspectorDeck-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/InspectorDeck-14B-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32408454013129606 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6668480318764974 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3164652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39815625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5260970744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Jekyl-8b-sce/dc6a9e35-c130-4edc-93bc-5f0b6ac0e05d.json b/data/hfopenllm_v2/Quazim0t0/Jekyl-8b-sce/dc6a9e35-c130-4edc-93bc-5f0b6ac0e05d.json new file mode 100644 index 000000000..87a93a917 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Jekyl-8b-sce/dc6a9e35-c130-4edc-93bc-5f0b6ac0e05d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Jekyl-8b-sce/1762652579.82404", + "retrieved_timestamp": "1762652579.824041", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Jekyl-8b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Jekyl-8b-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46968931324441365 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4993588236391566 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16163141993957703 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41966666666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3686003989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Jigsaw-14B-Linear/7533defe-b19d-4571-a403-c443ec03a31b.json b/data/hfopenllm_v2/Quazim0t0/Jigsaw-14B-Linear/7533defe-b19d-4571-a403-c443ec03a31b.json new file mode 100644 index 000000000..9a646a47b --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Jigsaw-14B-Linear/7533defe-b19d-4571-a403-c443ec03a31b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Jigsaw-14B-Linear/1762652579.824291", + "retrieved_timestamp": "1762652579.824291", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Jigsaw-14B-Linear", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Jigsaw-14B-Linear" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6480416406246536 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6864625931836906 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26510574018126887 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34060402684563756 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44826041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5233543882978723 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Katana-8b-sce/dea8c833-7deb-43f8-9b15-acbadf4fc749.json b/data/hfopenllm_v2/Quazim0t0/Katana-8b-sce/dea8c833-7deb-43f8-9b15-acbadf4fc749.json new file mode 100644 index 000000000..c4a541ffc --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Katana-8b-sce/dea8c833-7deb-43f8-9b15-acbadf4fc749.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Katana-8b-sce/1762652579.8246028", + "retrieved_timestamp": "1762652579.8246038", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Katana-8b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Katana-8b-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5107304175144174 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5074684221457483 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1510574018126888 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4037604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3770777925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Knot-CoT-14B-sce/fe0b75bf-2035-4ffe-8cbf-d5f4c66907aa.json b/data/hfopenllm_v2/Quazim0t0/Knot-CoT-14B-sce/fe0b75bf-2035-4ffe-8cbf-d5f4c66907aa.json new file mode 100644 index 000000000..17bf6a2c1 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Knot-CoT-14B-sce/fe0b75bf-2035-4ffe-8cbf-d5f4c66907aa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Knot-CoT-14B-sce/1762652579.8248682", + "retrieved_timestamp": "1762652579.8248692", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Knot-CoT-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Knot-CoT-14B-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4831779677921249 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6615610657544672 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3995468277945619 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41403125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.515375664893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Lineage-14B/37f890b7-5487-46ea-b61e-d91b5349d078.json b/data/hfopenllm_v2/Quazim0t0/Lineage-14B/37f890b7-5487-46ea-b61e-d91b5349d078.json new file mode 100644 index 000000000..aaa0f4b34 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Lineage-14B/37f890b7-5487-46ea-b61e-d91b5349d078.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Lineage-14B/1762652579.82509", + "retrieved_timestamp": "1762652579.8250911", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Lineage-14B", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Lineage-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7070428684778609 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6933789516730196 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4244712990936556 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3598993288590604 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4597291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5410571808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Loke-14B-sce/cfac443e-5c66-45e3-bf7a-7c596d01d4ff.json b/data/hfopenllm_v2/Quazim0t0/Loke-14B-sce/cfac443e-5c66-45e3-bf7a-7c596d01d4ff.json new file mode 100644 index 000000000..503da1ce6 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Loke-14B-sce/cfac443e-5c66-45e3-bf7a-7c596d01d4ff.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Loke-14B-sce/1762652579.825529", + "retrieved_timestamp": "1762652579.82553", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Loke-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Loke-14B-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6847863668399845 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6923902176707362 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3904833836858006 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3649328859060403 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46366666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5401429521276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/MFDOOM-14B/3efa12a5-4525-4ee9-80bd-99c4b8d2ccb2.json b/data/hfopenllm_v2/Quazim0t0/MFDOOM-14B/3efa12a5-4525-4ee9-80bd-99c4b8d2ccb2.json new file mode 100644 index 000000000..901ed05e8 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/MFDOOM-14B/3efa12a5-4525-4ee9-80bd-99c4b8d2ccb2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_MFDOOM-14B/1762652579.825741", + "retrieved_timestamp": "1762652579.825742", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/MFDOOM-14B", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/MFDOOM-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6736204382150472 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6916400252742457 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5264350453172205 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43765625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5425531914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/MFGRIMM-14B/773228d8-7e03-4ba8-87c1-f59ac5aad425.json b/data/hfopenllm_v2/Quazim0t0/MFGRIMM-14B/773228d8-7e03-4ba8-87c1-f59ac5aad425.json new file mode 100644 index 000000000..f7e9cc965 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/MFGRIMM-14B/773228d8-7e03-4ba8-87c1-f59ac5aad425.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_MFGRIMM-14B/1762652579.8259468", + "retrieved_timestamp": "1762652579.825948", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/MFGRIMM-14B", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/MFGRIMM-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6894074389287091 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.69087746819662 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5060422960725075 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3338926174496644 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43613541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5416389627659575 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Mithril-14B-sce/8ab4e441-2efb-4510-87ea-43f3fbcc67ac.json b/data/hfopenllm_v2/Quazim0t0/Mithril-14B-sce/8ab4e441-2efb-4510-87ea-43f3fbcc67ac.json new file mode 100644 index 000000000..157f65994 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Mithril-14B-sce/8ab4e441-2efb-4510-87ea-43f3fbcc67ac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Mithril-14B-sce/1762652579.826359", + "retrieved_timestamp": "1762652579.82636", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Mithril-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Mithril-14B-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6957772044841022 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6925969240705362 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3821752265861027 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4610625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5403091755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Mononoke-14B-sce/6f2d122b-f7fe-448a-ac8b-864314e94692.json b/data/hfopenllm_v2/Quazim0t0/Mononoke-14B-sce/6f2d122b-f7fe-448a-ac8b-864314e94692.json new file mode 100644 index 000000000..5f2beee58 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Mononoke-14B-sce/6f2d122b-f7fe-448a-ac8b-864314e94692.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Mononoke-14B-sce/1762652579.8265631", + "retrieved_timestamp": "1762652579.826564", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Mononoke-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Mononoke-14B-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3502129904209719 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6744431226588331 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4697885196374622 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4154583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5297539893617021 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Motion-8B-Linear/db82138b-f915-4451-aa85-8bc4c7fdd225.json b/data/hfopenllm_v2/Quazim0t0/Motion-8B-Linear/db82138b-f915-4451-aa85-8bc4c7fdd225.json new file mode 100644 index 000000000..f5ed2ea44 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Motion-8B-Linear/db82138b-f915-4451-aa85-8bc4c7fdd225.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Motion-8B-Linear/1762652579.826771", + "retrieved_timestamp": "1762652579.826771", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Motion-8B-Linear", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Motion-8B-Linear" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7685917809190725 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5084252652465131 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18882175226586104 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36060416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3784906914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Mouse-9B/70e3145f-d67b-403d-af2a-1b06b2ba0f24.json b/data/hfopenllm_v2/Quazim0t0/Mouse-9B/70e3145f-d67b-403d-af2a-1b06b2ba0f24.json new file mode 100644 index 000000000..7040b3653 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Mouse-9B/70e3145f-d67b-403d-af2a-1b06b2ba0f24.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Mouse-9B/1762652579.826978", + "retrieved_timestamp": "1762652579.826978", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Mouse-9B", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Mouse-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1324917884546337 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29789470527601253 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.005287009063444109 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3469583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11386303191489362 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 9.207 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Nova-14b-sce/3336c8fa-fcef-4513-946d-9254f537e418.json b/data/hfopenllm_v2/Quazim0t0/Nova-14b-sce/3336c8fa-fcef-4513-946d-9254f537e418.json new file mode 100644 index 000000000..70994c6af --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Nova-14b-sce/3336c8fa-fcef-4513-946d-9254f537e418.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Nova-14b-sce/1762652579.827177", + "retrieved_timestamp": "1762652579.827178", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Nova-14b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Nova-14b-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7021968377239058 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6935261478148286 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4161631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36325503355704697 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4570625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5413065159574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/NovaScotia-14b-stock/8ab3ce59-d0cd-4764-98c7-c4df81bc3c23.json b/data/hfopenllm_v2/Quazim0t0/NovaScotia-14b-stock/8ab3ce59-d0cd-4764-98c7-c4df81bc3c23.json new file mode 100644 index 000000000..8cf9a071a --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/NovaScotia-14b-stock/8ab3ce59-d0cd-4764-98c7-c4df81bc3c23.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_NovaScotia-14b-stock/1762652579.827381", + "retrieved_timestamp": "1762652579.827381", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/NovaScotia-14b-stock", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/NovaScotia-14b-stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6787412953186434 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6935261478148286 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.348993288590604 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44934375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5408909574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/66743ed1-93ab-41f7-9002-0080e7f74722.json b/data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/66743ed1-93ab-41f7-9002-0080e7f74722.json new file mode 100644 index 000000000..f6982052b --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/66743ed1-93ab-41f7-9002-0080e7f74722.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_ODB-14b-sce/1762652579.827807", + "retrieved_timestamp": "1762652579.827808", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/ODB-14b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/ODB-14b-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7015973173402128 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6941928144814953 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.411631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3624161073825503 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4570625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5411402925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/79d7d2a1-dcb6-40a7-b29c-7213ebd261df.json b/data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/79d7d2a1-dcb6-40a7-b29c-7213ebd261df.json new file mode 100644 index 000000000..c5d36571c --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/ODB-14B-sce/79d7d2a1-dcb6-40a7-b29c-7213ebd261df.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_ODB-14B-sce/1762652579.827594", + "retrieved_timestamp": "1762652579.827595", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/ODB-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/ODB-14B-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.292235712354331 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6558922017209644 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2545317220543807 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39288541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5206948138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Unknown", + "params_billions": 0.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Oasis-14B-ties/a3ef4bc2-c560-4a62-8227-2bd30120b537.json b/data/hfopenllm_v2/Quazim0t0/Oasis-14B-ties/a3ef4bc2-c560-4a62-8227-2bd30120b537.json new file mode 100644 index 000000000..9a3a7a793 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Oasis-14B-ties/a3ef4bc2-c560-4a62-8227-2bd30120b537.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Oasis-14B-ties/1762652579.827992", + "retrieved_timestamp": "1762652579.8279932", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Oasis-14B-ties", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Oasis-14B-ties" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6936539492989712 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6914976731342066 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37537764350453173 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3649328859060403 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4570625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5404753989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Origami-14B-sce/82826944-e4a1-47bd-b240-c70e21acfc51.json b/data/hfopenllm_v2/Quazim0t0/Origami-14B-sce/82826944-e4a1-47bd-b240-c70e21acfc51.json new file mode 100644 index 000000000..1c209a31b --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Origami-14B-sce/82826944-e4a1-47bd-b240-c70e21acfc51.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Origami-14B-sce/1762652579.828193", + "retrieved_timestamp": "1762652579.8281941", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Origami-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Origami-14B-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3259329689667859 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6620277470720752 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29154078549848944 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40348958333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5244348404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Ponder-14B-linear/30942374-a112-4035-a4f2-e30bff57f9ce.json b/data/hfopenllm_v2/Quazim0t0/Ponder-14B-linear/30942374-a112-4035-a4f2-e30bff57f9ce.json new file mode 100644 index 000000000..a76444f99 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Ponder-14B-linear/30942374-a112-4035-a4f2-e30bff57f9ce.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Ponder-14B-linear/1762652579.8290088", + "retrieved_timestamp": "1762652579.8290088", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Ponder-14B-linear", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Ponder-14B-linear" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6906064796960952 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6942602302118323 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4282477341389728 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35822147651006714 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45576041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5408078457446809 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/RZA-14B-sce/e8a8cf1f-5bcf-45ae-b590-fb04de06b77f.json b/data/hfopenllm_v2/Quazim0t0/RZA-14B-sce/e8a8cf1f-5bcf-45ae-b590-fb04de06b77f.json new file mode 100644 index 000000000..962a40719 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/RZA-14B-sce/e8a8cf1f-5bcf-45ae-b590-fb04de06b77f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_RZA-14B-sce/1762652579.829216", + "retrieved_timestamp": "1762652579.829216", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/RZA-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/RZA-14B-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4773578549360142 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6685829139021245 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5188821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41133333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.538314494680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Rosemary-14b/84018db9-2b85-4b6f-beff-b4930b230399.json b/data/hfopenllm_v2/Quazim0t0/Rosemary-14b/84018db9-2b85-4b6f-beff-b4930b230399.json new file mode 100644 index 000000000..f438e496f --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Rosemary-14b/84018db9-2b85-4b6f-beff-b4930b230399.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Rosemary-14b/1762652579.829469", + "retrieved_timestamp": "1762652579.82947", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Rosemary-14b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Rosemary-14b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6915306941138402 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6955261478148286 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.438821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3565436241610738 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44921875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5396442819148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Rune-14b/3ed52eaf-6b73-46ab-8ae7-3afe120fe437.json b/data/hfopenllm_v2/Quazim0t0/Rune-14b/3ed52eaf-6b73-46ab-8ae7-3afe120fe437.json new file mode 100644 index 000000000..4817ea5e2 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Rune-14b/3ed52eaf-6b73-46ab-8ae7-3afe120fe437.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Rune-14b/1762652579.829681", + "retrieved_timestamp": "1762652579.8296819", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Rune-14b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Rune-14b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7015973173402128 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6937489642141156 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45845921450151056 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35151006711409394 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45328125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5411402925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/SZA-14B-sce/6d983237-925e-4197-a592-17cca9219bda.json b/data/hfopenllm_v2/Quazim0t0/SZA-14B-sce/6d983237-925e-4197-a592-17cca9219bda.json new file mode 100644 index 000000000..bf5fc7245 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/SZA-14B-sce/6d983237-925e-4197-a592-17cca9219bda.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_SZA-14B-sce/1762652579.829889", + "retrieved_timestamp": "1762652579.82989", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/SZA-14B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/SZA-14B-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5659095644002359 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6888749072998727 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5241691842900302 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.433875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5353224734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Sake-20b/25a672ed-3e0e-416f-abf4-a935e63171c6.json b/data/hfopenllm_v2/Quazim0t0/Sake-20b/25a672ed-3e0e-416f-abf4-a935e63171c6.json new file mode 100644 index 000000000..12ec012b9 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Sake-20b/25a672ed-3e0e-416f-abf4-a935e63171c6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Sake-20b/1762652579.830092", + "retrieved_timestamp": "1762652579.8300931", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Sake-20b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Sake-20b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6692741924759638 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6769823539837527 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4652567975830816 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44940625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5391456117021277 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 21.475 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Spok-14b-sce/9f15293c-5668-4895-b4d0-4062cac344e7.json b/data/hfopenllm_v2/Quazim0t0/Spok-14b-sce/9f15293c-5668-4895-b4d0-4062cac344e7.json new file mode 100644 index 000000000..5fdb8fc04 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Spok-14b-sce/9f15293c-5668-4895-b4d0-4062cac344e7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Spok-14b-sce/1762652579.830291", + "retrieved_timestamp": "1762652579.830292", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Spok-14b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Spok-14b-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6681748870773991 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6899172301380289 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2719033232628399 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34563758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41409375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5297539893617021 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Sumatra-20b/ae69fb3f-19a1-4b00-9309-8685e107aeba.json b/data/hfopenllm_v2/Quazim0t0/Sumatra-20b/ae69fb3f-19a1-4b00-9309-8685e107aeba.json new file mode 100644 index 000000000..513112495 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Sumatra-20b/ae69fb3f-19a1-4b00-9309-8685e107aeba.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Sumatra-20b/1762652579.830487", + "retrieved_timestamp": "1762652579.830488", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Sumatra-20b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Sumatra-20b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.673795529195867 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6855416597047258 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36706948640483383 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3263422818791946 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4560104166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5414727393617021 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 21.475 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/SuperNova14b/b0659361-fb53-40db-81a7-2a72771bbd1a.json b/data/hfopenllm_v2/Quazim0t0/SuperNova14b/b0659361-fb53-40db-81a7-2a72771bbd1a.json new file mode 100644 index 000000000..e6ceb3c3b --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/SuperNova14b/b0659361-fb53-40db-81a7-2a72771bbd1a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_SuperNova14b/1762652579.830682", + "retrieved_timestamp": "1762652579.830683", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/SuperNova14b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/SuperNova14b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.707642388861554 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6937489642141156 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4395770392749245 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3523489932885906 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4545208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.543467420212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/TB0-8B-sce/8f0da98a-cf9f-4cbb-8d4a-8c12d737580c.json b/data/hfopenllm_v2/Quazim0t0/TB0-8B-sce/8f0da98a-cf9f-4cbb-8d4a-8c12d737580c.json new file mode 100644 index 000000000..ac3d8cee1 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/TB0-8B-sce/8f0da98a-cf9f-4cbb-8d4a-8c12d737580c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_TB0-8B-sce/1762652579.8308768", + "retrieved_timestamp": "1762652579.8308768", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/TB0-8B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/TB0-8B-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5107304175144174 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5074684221457483 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1510574018126888 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4037604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3770777925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/TBL-8B-sce/4bff88c0-89fb-4d07-a83d-251c7aaeace4.json b/data/hfopenllm_v2/Quazim0t0/TBL-8B-sce/4bff88c0-89fb-4d07-a83d-251c7aaeace4.json new file mode 100644 index 000000000..5ed9a174b --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/TBL-8B-sce/4bff88c0-89fb-4d07-a83d-251c7aaeace4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_TBL-8B-sce/1762652579.831074", + "retrieved_timestamp": "1762652579.831075", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/TBL-8B-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/TBL-8B-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45809895521660304 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5008187839060233 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15332326283987915 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3338926174496644 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42363541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3689328457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Venti-20b/2b97259b-d7a5-4934-b350-7b1322964899.json b/data/hfopenllm_v2/Quazim0t0/Venti-20b/2b97259b-d7a5-4934-b350-7b1322964899.json new file mode 100644 index 000000000..0029072e0 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Venti-20b/2b97259b-d7a5-4934-b350-7b1322964899.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Venti-20b/1762652579.8314738", + "retrieved_timestamp": "1762652579.831475", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Venti-20b", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Venti-20b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6641034676879568 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6901240010129452 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3391238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33221476510067116 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44797916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5386469414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 21.475 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Venti-Blend-sce/e9fa96ff-d790-4948-9071-dd1376701fc1.json b/data/hfopenllm_v2/Quazim0t0/Venti-Blend-sce/e9fa96ff-d790-4948-9071-dd1376701fc1.json new file mode 100644 index 000000000..9130e62f4 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Venti-Blend-sce/e9fa96ff-d790-4948-9071-dd1376701fc1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Venti-Blend-sce/1762652579.831816", + "retrieved_timestamp": "1762652579.8318179", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Venti-Blend-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Venti-Blend-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6879335718116819 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6842921511560114 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40558912386706947 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43892708333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5413896276595744 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 21.475 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Vine-14b-sce/70d25d8c-96e9-45e4-b0d1-684a89278064.json b/data/hfopenllm_v2/Quazim0t0/Vine-14b-sce/70d25d8c-96e9-45e4-b0d1-684a89278064.json new file mode 100644 index 000000000..05a5f424c --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Vine-14b-sce/70d25d8c-96e9-45e4-b0d1-684a89278064.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Vine-14b-sce/1762652579.8321972", + "retrieved_timestamp": "1762652579.832198", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Vine-14b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Vine-14b-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.673345611865406 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6891400252742456 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5007552870090635 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3338926174496644 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4322916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5408078457446809 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Wendy-14B/13e6cad7-a063-4530-bec9-e70e4e98ccc0.json b/data/hfopenllm_v2/Quazim0t0/Wendy-14B/13e6cad7-a063-4530-bec9-e70e4e98ccc0.json new file mode 100644 index 000000000..f398b89d8 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Wendy-14B/13e6cad7-a063-4530-bec9-e70e4e98ccc0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Wendy-14B/1762652579.832468", + "retrieved_timestamp": "1762652579.832469", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Wendy-14B", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Wendy-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6772175605172055 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6957587467354328 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48338368580060426 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33221476510067116 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4428020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.543467420212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/Wu-14b-sce/35443539-9756-466b-a36f-66adc5f68ddb.json b/data/hfopenllm_v2/Quazim0t0/Wu-14b-sce/35443539-9756-466b-a36f-66adc5f68ddb.json new file mode 100644 index 000000000..d0bdfb6e1 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/Wu-14b-sce/35443539-9756-466b-a36f-66adc5f68ddb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Wu-14b-sce/1762652579.832721", + "retrieved_timestamp": "1762652579.832722", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Wu-14b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/Wu-14b-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6718218770639681 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6885164810743585 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26132930513595165 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3464765100671141 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41142708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5292553191489362 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/bloom-14b-stock/1a2b4a76-0feb-4404-a1ef-0408c75f2ca7.json b/data/hfopenllm_v2/Quazim0t0/bloom-14b-stock/1a2b4a76-0feb-4404-a1ef-0408c75f2ca7.json new file mode 100644 index 000000000..4e3c94ba5 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/bloom-14b-stock/1a2b4a76-0feb-4404-a1ef-0408c75f2ca7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_bloom-14b-stock/1762652579.8329449", + "retrieved_timestamp": "1762652579.8329458", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/bloom-14b-stock", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/bloom-14b-stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6575087434673332 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6877869223612597 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4811178247734139 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43095833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5373171542553191 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/caramel-14B/a9d4b6a9-33af-42a3-be29-d3214a171433.json b/data/hfopenllm_v2/Quazim0t0/caramel-14B/a9d4b6a9-33af-42a3-be29-d3214a171433.json new file mode 100644 index 000000000..595e209ea --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/caramel-14B/a9d4b6a9-33af-42a3-be29-d3214a171433.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_caramel-14B/1762652579.833162", + "retrieved_timestamp": "1762652579.833163", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/caramel-14B", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/caramel-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6744947849483814 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6918707471458787 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47129909365558914 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3447986577181208 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.445375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5435505319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/mocha-14B/5c04fa63-11be-42d8-8133-4e79e08e42ad.json b/data/hfopenllm_v2/Quazim0t0/mocha-14B/5c04fa63-11be-42d8-8133-4e79e08e42ad.json new file mode 100644 index 000000000..07b3aaec1 --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/mocha-14B/5c04fa63-11be-42d8-8133-4e79e08e42ad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_mocha-14B/1762652579.833622", + "retrieved_timestamp": "1762652579.833623", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/mocha-14B", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/mocha-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5893152391210876 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6894730595527842 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5264350453172205 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4271770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5383976063829787 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/mosaic-14b-sce/4fd82b3e-4b13-4e21-9253-6492f8b1feaa.json b/data/hfopenllm_v2/Quazim0t0/mosaic-14b-sce/4fd82b3e-4b13-4e21-9253-6492f8b1feaa.json new file mode 100644 index 000000000..f92e34bff --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/mosaic-14b-sce/4fd82b3e-4b13-4e21-9253-6492f8b1feaa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_mosaic-14b-sce/1762652579.8338351", + "retrieved_timestamp": "1762652579.833836", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/mosaic-14b-sce", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/mosaic-14b-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6875590100932193 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6907089244809823 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4025679758308157 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3624161073825503 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45579166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5396442819148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/tesseract-14b-stock/4311b63a-282b-4c16-8609-a1d4ab93ace9.json b/data/hfopenllm_v2/Quazim0t0/tesseract-14b-stock/4311b63a-282b-4c16-8609-a1d4ab93ace9.json new file mode 100644 index 000000000..27e67e8ba --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/tesseract-14b-stock/4311b63a-282b-4c16-8609-a1d4ab93ace9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_tesseract-14b-stock/1762652579.834054", + "retrieved_timestamp": "1762652579.834055", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/tesseract-14b-stock", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/tesseract-14b-stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5847939024011845 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6880007346047826 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5143504531722054 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271812080536913 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42323958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5388962765957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Quazim0t0/time-14b-stock/2755da2c-8347-4bbd-80ee-c58e77a26f5e.json b/data/hfopenllm_v2/Quazim0t0/time-14b-stock/2755da2c-8347-4bbd-80ee-c58e77a26f5e.json new file mode 100644 index 000000000..21641aeda --- /dev/null +++ b/data/hfopenllm_v2/Quazim0t0/time-14b-stock/2755da2c-8347-4bbd-80ee-c58e77a26f5e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_time-14b-stock/1762652579.834393", + "retrieved_timestamp": "1762652579.8343942", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/time-14b-stock", + "developer": "Quazim0t0", + "inference_platform": "unknown", + "id": "Quazim0t0/time-14b-stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6699235805440675 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6897025970028126 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5083081570996979 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3347315436241611 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43232291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5418882978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-0.5B-Chat/96baee1a-7ea7-454f-ac8b-fe1bead3cd93.json b/data/hfopenllm_v2/Qwen/Qwen1.5-0.5B-Chat/96baee1a-7ea7-454f-ac8b-fe1bead3cd93.json new file mode 100644 index 000000000..a76aa8368 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-0.5B-Chat/96baee1a-7ea7-454f-ac8b-fe1bead3cd93.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-0.5B-Chat/1762652579.835679", + "retrieved_timestamp": "1762652579.83568", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen1.5-0.5B-Chat", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen1.5-0.5B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18072713732895385 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3166662152036714 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3837083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12125997340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.62 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-1.8B-Chat/d6107bde-875e-40f6-8471-3a3507758910.json b/data/hfopenllm_v2/Qwen/Qwen1.5-1.8B-Chat/d6107bde-875e-40f6-8471-3a3507758910.json new file mode 100644 index 000000000..14f97dd40 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-1.8B-Chat/d6107bde-875e-40f6-8471-3a3507758910.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-1.8B-Chat/1762652579.836214", + "retrieved_timestamp": "1762652579.836215", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen1.5-1.8B-Chat", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen1.5-1.8B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20190982149585324 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3255912875735599 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42596875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18035239361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.837 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-110B-Chat/7cfcae3d-b623-4cf0-9ac8-529db46d05e6.json b/data/hfopenllm_v2/Qwen/Qwen1.5-110B-Chat/7cfcae3d-b623-4cf0-9ac8-529db46d05e6.json new file mode 100644 index 000000000..5e6f09846 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-110B-Chat/7cfcae3d-b623-4cf0-9ac8-529db46d05e6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-110B-Chat/1762652579.836649", + "retrieved_timestamp": "1762652579.836649", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen1.5-110B-Chat", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen1.5-110B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5938864435254014 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6183800385588633 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23413897280966767 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3414429530201342 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45216666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48246343085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 111.21 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-14B-Chat/e2cdcc99-a1b6-43ee-9cda-2e7ccbd0ad8d.json b/data/hfopenllm_v2/Qwen/Qwen1.5-14B-Chat/e2cdcc99-a1b6-43ee-9cda-2e7ccbd0ad8d.json new file mode 100644 index 000000000..9d8b6c0c9 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-14B-Chat/e2cdcc99-a1b6-43ee-9cda-2e7ccbd0ad8d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-14B-Chat/1762652579.837058", + "retrieved_timestamp": "1762652579.837059", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen1.5-14B-Chat", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen1.5-14B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47680820223673187 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5228587510703555 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15256797583081572 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43997916666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36178523936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.167 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-32B-Chat/c14a0d32-1d27-4596-90d4-10a793aef9a2.json b/data/hfopenllm_v2/Qwen/Qwen1.5-32B-Chat/c14a0d32-1d27-4596-90d4-10a793aef9a2.json new file mode 100644 index 000000000..e31bf21b1 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-32B-Chat/c14a0d32-1d27-4596-90d4-10a793aef9a2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-32B-Chat/1762652579.8374798", + "retrieved_timestamp": "1762652579.8374798", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen1.5-32B-Chat", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen1.5-32B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5532199009738605 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6066899757930234 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19561933534743203 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4159791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4457280585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.512 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-4B-Chat/e3417d3e-7883-45a7-a631-9e5d105788c4.json b/data/hfopenllm_v2/Qwen/Qwen1.5-4B-Chat/e3417d3e-7883-45a7-a631-9e5d105788c4.json new file mode 100644 index 000000000..fb295ebc7 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-4B-Chat/e3417d3e-7883-45a7-a631-9e5d105788c4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-4B-Chat/1762652579.837912", + "retrieved_timestamp": "1762652579.837912", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen1.5-4B-Chat", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen1.5-4B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31566576683200576 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40055485611486114 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39778125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23961103723404256 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.95 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-7B-Chat/42e3c9e4-bf1a-43ae-87e7-056f735abe03.json b/data/hfopenllm_v2/Qwen/Qwen1.5-7B-Chat/42e3c9e4-bf1a-43ae-87e7-056f735abe03.json new file mode 100644 index 000000000..eacf6abad --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-7B-Chat/42e3c9e4-bf1a-43ae-87e7-056f735abe03.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-7B-Chat/1762652579.838321", + "retrieved_timestamp": "1762652579.838322", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen1.5-7B-Chat", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen1.5-7B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43711574178734647 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4510053116521351 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06268882175226587 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37790624999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2951296542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.721 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen1.5-MoE-A2.7B-Chat/daec0873-964e-459e-a1a1-49da96cd17cf.json b/data/hfopenllm_v2/Qwen/Qwen1.5-MoE-A2.7B-Chat/daec0873-964e-459e-a1a1-49da96cd17cf.json new file mode 100644 index 000000000..49af3c35c --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen1.5-MoE-A2.7B-Chat/daec0873-964e-459e-a1a1-49da96cd17cf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-MoE-A2.7B-Chat/1762652579.838758", + "retrieved_timestamp": "1762652579.838758", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen1.5-MoE-A2.7B-Chat", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen1.5-MoE-A2.7B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37953851336675576 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4272088620635824 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38987499999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29230385638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 14.316 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-0.5B-Instruct/6986e9f0-d008-4418-b3cb-1e870cf57e02.json b/data/hfopenllm_v2/Qwen/Qwen2-0.5B-Instruct/6986e9f0-d008-4418-b3cb-1e870cf57e02.json new file mode 100644 index 000000000..1bfcb1fac --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-0.5B-Instruct/6986e9f0-d008-4418-b3cb-1e870cf57e02.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-0.5B-Instruct/1762652579.839177", + "retrieved_timestamp": "1762652579.839178", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2-0.5B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2-0.5B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22466610814860127 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31725179384863494 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028700906344410877 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24664429530201343 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33527083333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15309175531914893 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-1.5B-Instruct/984029c7-f957-4555-8460-dfecd99f44a1.json b/data/hfopenllm_v2/Qwen/Qwen2-1.5B-Instruct/984029c7-f957-4555-8460-dfecd99f44a1.json new file mode 100644 index 000000000..147eebc5c --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-1.5B-Instruct/984029c7-f957-4555-8460-dfecd99f44a1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-1.5B-Instruct/1762652579.839607", + "retrieved_timestamp": "1762652579.839607", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2-1.5B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2-1.5B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3371232773485463 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3852232408376059 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07175226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42928125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25008311170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-57B-A14B-Instruct/50496313-dc6c-4456-8a8c-15cd8ddbb480.json b/data/hfopenllm_v2/Qwen/Qwen2-57B-A14B-Instruct/50496313-dc6c-4456-8a8c-15cd8ddbb480.json new file mode 100644 index 000000000..c695fade2 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-57B-A14B-Instruct/50496313-dc6c-4456-8a8c-15cd8ddbb480.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-57B-A14B-Instruct/1762652579.84003", + "retrieved_timestamp": "1762652579.840031", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2-57B-A14B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2-57B-A14B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6337783747124297 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5887606963532052 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28172205438066467 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43613541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45752992021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 57.409 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-72B-Instruct/d9ae7c35-ac71-4703-9cfe-bf5fb5aa688e.json b/data/hfopenllm_v2/Qwen/Qwen2-72B-Instruct/d9ae7c35-ac71-4703-9cfe-bf5fb5aa688e.json new file mode 100644 index 000000000..4cfc17031 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-72B-Instruct/d9ae7c35-ac71-4703-9cfe-bf5fb5aa688e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-72B-Instruct/1762652579.840446", + "retrieved_timestamp": "1762652579.840447", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2-72B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2-72B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7989168738945996 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.697730968386067 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4176737160120846 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3724832214765101 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4560104166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5403091755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-7B-Instruct/3e1ebb01-6fbb-498c-af58-022f50247ec9.json b/data/hfopenllm_v2/Qwen/Qwen2-7B-Instruct/3e1ebb01-6fbb-498c-af58-022f50247ec9.json new file mode 100644 index 000000000..ed17e358a --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-7B-Instruct/3e1ebb01-6fbb-498c-af58-022f50247ec9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-7B-Instruct/1762652579.84092", + "retrieved_timestamp": "1762652579.84092", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2-7B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5679075962889577 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5544781563793189 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2764350453172205 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39279166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38472406914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-Math-72B-Instruct/1c7bb42e-aa1c-4522-a4b0-bcc460876125.json b/data/hfopenllm_v2/Qwen/Qwen2-Math-72B-Instruct/1c7bb42e-aa1c-4522-a4b0-bcc460876125.json new file mode 100644 index 000000000..5bbbd45f2 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-Math-72B-Instruct/1c7bb42e-aa1c-4522-a4b0-bcc460876125.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-Math-72B-Instruct/1762652579.841145", + "retrieved_timestamp": "1762652579.8411462", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2-Math-72B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2-Math-72B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.569381463405985 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.634337660025181 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5536253776435045 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36828859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45169791666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42727726063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-VL-72B-Instruct/2f749e28-b845-45ab-a628-8f9b6a9029d9.json b/data/hfopenllm_v2/Qwen/Qwen2-VL-72B-Instruct/2f749e28-b845-45ab-a628-8f9b6a9029d9.json new file mode 100644 index 000000000..e4648e325 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-VL-72B-Instruct/2f749e28-b845-45ab-a628-8f9b6a9029d9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-VL-72B-Instruct/1762652579.841569", + "retrieved_timestamp": "1762652579.8415701", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2-VL-72B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2-VL-72B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5982326892644849 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6946287292338682 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34441087613293053 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3875838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44921875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5717253989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2VLForConditionalGeneration", + "params_billions": 73.406 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2-VL-7B-Instruct/6dd0eebe-ef61-431d-bf7c-c170475bed5f.json b/data/hfopenllm_v2/Qwen/Qwen2-VL-7B-Instruct/6dd0eebe-ef61-431d-bf7c-c170475bed5f.json new file mode 100644 index 000000000..888c91fe2 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2-VL-7B-Instruct/6dd0eebe-ef61-431d-bf7c-c170475bed5f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-VL-7B-Instruct/1762652579.841773", + "retrieved_timestamp": "1762652579.841774", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2-VL-7B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2-VL-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4599218961245052 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5464507159069989 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1986404833836858 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40949135638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2VLForConditionalGeneration", + "params_billions": 8.291 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/14d1ea99-ae05-42cd-9f2f-de1a98d9846d.json b/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/14d1ea99-ae05-42cd-9f2f-de1a98d9846d.json new file mode 100644 index 000000000..edb8fb138 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/14d1ea99-ae05-42cd-9f2f-de1a98d9846d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-0.5B-Instruct/1762652579.842413", + "retrieved_timestamp": "1762652579.8424141", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-0.5B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-0.5B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31529120511354314 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3321916429549138 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10347432024169184 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3341875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17195811170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/883755e2-69eb-459b-ae7f-5548914aa65e.json b/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/883755e2-69eb-459b-ae7f-5548914aa65e.json new file mode 100644 index 000000000..4879eaba0 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-0.5B-Instruct/883755e2-69eb-459b-ae7f-5548914aa65e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-0.5B-Instruct/1762652579.842189", + "retrieved_timestamp": "1762652579.84219", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-0.5B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-0.5B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.307122878407071 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3340729214937266 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33288541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16971409574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.5 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-1.5B-Instruct/9744dd76-a8cd-4400-92a7-f10b375710ae.json b/data/hfopenllm_v2/Qwen/Qwen2.5-1.5B-Instruct/9744dd76-a8cd-4400-92a7-f10b375710ae.json new file mode 100644 index 000000000..791b75f7b --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-1.5B-Instruct/9744dd76-a8cd-4400-92a7-f10b375710ae.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-1.5B-Instruct/1762652579.842835", + "retrieved_timestamp": "1762652579.842836", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-1.5B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-1.5B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4475569267321817 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4288982740422907 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22054380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3663125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27992021276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.5 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct-1M/52ff136b-084f-4ca3-a48e-83fb0bbd8ebc.json b/data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct-1M/52ff136b-084f-4ca3-a48e-83fb0bbd8ebc.json new file mode 100644 index 000000000..f93c2aea9 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct-1M/52ff136b-084f-4ca3-a48e-83fb0bbd8ebc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-14B-Instruct-1M/1762652579.843473", + "retrieved_timestamp": "1762652579.843473", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-14B-Instruct-1M", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-14B-Instruct-1M" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8413564896696322 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6198222551365405 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5302114803625377 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34312080536912754 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.418 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4849567819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct/1f3e04ab-9f97-4eda-9d40-669eda073ac3.json b/data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct/1f3e04ab-9f97-4eda-9d40-669eda073ac3.json new file mode 100644 index 000000000..518abf00a --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-14B-Instruct/1f3e04ab-9f97-4eda-9d40-669eda073ac3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-14B-Instruct/1762652579.843263", + "retrieved_timestamp": "1762652579.843264", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-14B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-14B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8157776920792386 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6390453705906222 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.547583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221476510067114 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4100625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4904421542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-32B-Instruct/c921186d-6e97-46d6-b968-894159271620.json b/data/hfopenllm_v2/Qwen/Qwen2.5-32B-Instruct/c921186d-6e97-46d6-b968-894159271620.json new file mode 100644 index 000000000..6a91b08d6 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-32B-Instruct/c921186d-6e97-46d6-b968-894159271620.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-32B-Instruct/1762652579.843922", + "retrieved_timestamp": "1762652579.843922", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-32B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-32B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8346121623957765 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6912525080134339 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6253776435045317 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42612500000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.566655585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-3B-Instruct/9fb4e863-fd72-4b60-bc20-e32e64ce99e8.json b/data/hfopenllm_v2/Qwen/Qwen2.5-3B-Instruct/9fb4e863-fd72-4b60-bc20-e32e64ce99e8.json new file mode 100644 index 000000000..55f7a9828 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-3B-Instruct/9fb4e863-fd72-4b60-bc20-e32e64ce99e8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-3B-Instruct/1762652579.844352", + "retrieved_timestamp": "1762652579.844352", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-3B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-3B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6474919879253713 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.469276665604885 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3678247734138973 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39679166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3254654255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-72B-Instruct/9ed2a831-aa5a-4e81-b8b5-397bc8b55835.json b/data/hfopenllm_v2/Qwen/Qwen2.5-72B-Instruct/9ed2a831-aa5a-4e81-b8b5-397bc8b55835.json new file mode 100644 index 000000000..0c5cdbb69 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-72B-Instruct/9ed2a831-aa5a-4e81-b8b5-397bc8b55835.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-72B-Instruct/1762652579.844789", + "retrieved_timestamp": "1762652579.844789", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-72B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-72B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.863837949972739 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7272747321744824 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5981873111782477 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42060416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5625831117021277 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct-1M/f338f8b3-d2fa-46e6-b2a1-b83303521b3f.json b/data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct-1M/f338f8b3-d2fa-46e6-b2a1-b83303521b3f.json new file mode 100644 index 000000000..8d7e125c3 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct-1M/f338f8b3-d2fa-46e6-b2a1-b83303521b3f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-7B-Instruct-1M/1762652579.845428", + "retrieved_timestamp": "1762652579.845428", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-7B-Instruct-1M", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-7B-Instruct-1M" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7447616767953474 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5403941270576822 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4335347432024169 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40869791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35048204787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct/7a336f2b-3b33-4fde-bce6-2d1e884a1b26.json b/data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct/7a336f2b-3b33-4fde-bce6-2d1e884a1b26.json new file mode 100644 index 000000000..d98156e9d --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-7B-Instruct/7a336f2b-3b33-4fde-bce6-2d1e884a1b26.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-7B-Instruct/1762652579.845207", + "retrieved_timestamp": "1762652579.8452082", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-7B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7585251576926999 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5394231968299095 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40203125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4286901595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-14B-Instruct/f2295cf4-86e0-4c73-8f3d-21c6e5ccd9d9.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-14B-Instruct/f2295cf4-86e0-4c73-8f3d-21c6e5ccd9d9.json new file mode 100644 index 000000000..68048525a --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-14B-Instruct/f2295cf4-86e0-4c73-8f3d-21c6e5ccd9d9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-14B-Instruct/1762652579.846175", + "retrieved_timestamp": "1762652579.846175", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-Coder-14B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-Coder-14B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6907560827493273 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6140296423661326 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.324773413897281 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3914583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3939494680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-32B-Instruct/c0ca7adb-6221-415f-8ed6-0de6439db168.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-32B-Instruct/c0ca7adb-6221-415f-8ed6-0de6439db168.json new file mode 100644 index 000000000..729a5f684 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-32B-Instruct/c0ca7adb-6221-415f-8ed6-0de6439db168.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-32B-Instruct/1762652579.846655", + "retrieved_timestamp": "1762652579.846655", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-Coder-32B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-Coder-32B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7265267268625026 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6625222222405129 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4954682779456193 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.348993288590604 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4385833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44132313829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/7629f304-5235-485b-a7f6-f5a7f91fd35c.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/7629f304-5235-485b-a7f6-f5a7f91fd35c.json new file mode 100644 index 000000000..4b16f461b --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/7629f304-5235-485b-a7f6-f5a7f91fd35c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-7B-Instruct/1762652579.847122", + "retrieved_timestamp": "1762652579.847123", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-Coder-7B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-Coder-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6101477413263474 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5007976986224548 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3716012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4072708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3351894946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/81749833-4f2a-4883-a789-c465c11b33b6.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/81749833-4f2a-4883-a789-c465c11b33b6.json new file mode 100644 index 000000000..982362509 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-Coder-7B-Instruct/81749833-4f2a-4883-a789-c465c11b33b6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-7B-Instruct/1762652579.8473449", + "retrieved_timestamp": "1762652579.8473458", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-Coder-7B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-Coder-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6147189457306613 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4999048550311305 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030966767371601207 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4099375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33543882978723405 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Math-1.5B-Instruct/393c9602-bd87-48d7-ad95-6baf85ed3341.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Math-1.5B-Instruct/393c9602-bd87-48d7-ad95-6baf85ed3341.json new file mode 100644 index 000000000..4fecfdea9 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-Math-1.5B-Instruct/393c9602-bd87-48d7-ad95-6baf85ed3341.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Math-1.5B-Instruct/1762652579.84755", + "retrieved_timestamp": "1762652579.84755", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-Math-1.5B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-Math-1.5B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1855731680829089 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37515353898426174 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2628398791540785 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3685416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1801030585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Math-72B-Instruct/64574dc3-4982-49c3-8526-09ebd5781175.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Math-72B-Instruct/64574dc3-4982-49c3-8526-09ebd5781175.json new file mode 100644 index 000000000..ee7447100 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-Math-72B-Instruct/64574dc3-4982-49c3-8526-09ebd5781175.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Math-72B-Instruct/1762652579.847774", + "retrieved_timestamp": "1762652579.847775", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-Math-72B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-Math-72B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4003466358151926 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6452266637803764 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6238670694864048 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44727083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4812167553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Qwen/Qwen2.5-Math-7B-Instruct/6ba8109e-8906-420f-a780-d0bef4015e1a.json b/data/hfopenllm_v2/Qwen/Qwen2.5-Math-7B-Instruct/6ba8109e-8906-420f-a780-d0bef4015e1a.json new file mode 100644 index 000000000..fcb9a0433 --- /dev/null +++ b/data/hfopenllm_v2/Qwen/Qwen2.5-Math-7B-Instruct/6ba8109e-8906-420f-a780-d0bef4015e1a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Math-7B-Instruct/1762652579.848376", + "retrieved_timestamp": "1762652579.848377", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-Math-7B-Instruct", + "developer": "Qwen", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-Math-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26358395723347383 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.438762734452786 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5808157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3647291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2819980053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/RDson/WomboCombo-R1-Coder-14B-Preview/faa623a7-1bf8-4da6-b381-7701f0446b70.json b/data/hfopenllm_v2/RDson/WomboCombo-R1-Coder-14B-Preview/faa623a7-1bf8-4da6-b381-7701f0446b70.json new file mode 100644 index 000000000..7aced4c7c --- /dev/null +++ b/data/hfopenllm_v2/RDson/WomboCombo-R1-Coder-14B-Preview/faa623a7-1bf8-4da6-b381-7701f0446b70.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/RDson_WomboCombo-R1-Coder-14B-Preview/1762652579.848609", + "retrieved_timestamp": "1762652579.8486102", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RDson/WomboCombo-R1-Coder-14B-Preview", + "developer": "RDson", + "inference_platform": "unknown", + "id": "RDson/WomboCombo-R1-Coder-14B-Preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.628557782240012 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6392098699331132 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5989425981873112 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4843854166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5167885638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/RLHFlow/LLaMA3-iterative-DPO-final/8ccda2e0-9801-41b0-8491-eb36615860f2.json b/data/hfopenllm_v2/RLHFlow/LLaMA3-iterative-DPO-final/8ccda2e0-9801-41b0-8491-eb36615860f2.json new file mode 100644 index 000000000..4d54c773d --- /dev/null +++ b/data/hfopenllm_v2/RLHFlow/LLaMA3-iterative-DPO-final/8ccda2e0-9801-41b0-8491-eb36615860f2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/RLHFlow_LLaMA3-iterative-DPO-final/1762652579.849687", + "retrieved_timestamp": "1762652579.849688", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RLHFlow/LLaMA3-iterative-DPO-final", + "developer": "RLHFlow", + "inference_platform": "unknown", + "id": "RLHFlow/LLaMA3-iterative-DPO-final" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.53401086893886 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5058257182733729 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08836858006042296 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3672708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32571476063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/RWKV/rwkv-raven-14b/9a90826f-9062-48aa-b047-d24f4e0d85ef.json b/data/hfopenllm_v2/RWKV/rwkv-raven-14b/9a90826f-9062-48aa-b047-d24f4e0d85ef.json new file mode 100644 index 000000000..912a3f252 --- /dev/null +++ b/data/hfopenllm_v2/RWKV/rwkv-raven-14b/9a90826f-9062-48aa-b047-d24f4e0d85ef.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/RWKV_rwkv-raven-14b/1762652579.849975", + "retrieved_timestamp": "1762652579.849976", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RWKV/rwkv-raven-14b", + "developer": "RWKV", + "inference_platform": "unknown", + "id": "RWKV/rwkv-raven-14b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07683723631076655 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3307041176552897 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.004531722054380665 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22902684563758388 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3951458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11502659574468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "RwkvForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Rakuten/RakutenAI-2.0-mini-instruct/549f9869-4b59-469b-b9fd-ea26114405a1.json b/data/hfopenllm_v2/Rakuten/RakutenAI-2.0-mini-instruct/549f9869-4b59-469b-b9fd-ea26114405a1.json new file mode 100644 index 000000000..0b1c3d3ee --- /dev/null +++ b/data/hfopenllm_v2/Rakuten/RakutenAI-2.0-mini-instruct/549f9869-4b59-469b-b9fd-ea26114405a1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Rakuten_RakutenAI-2.0-mini-instruct/1762652579.850244", + "retrieved_timestamp": "1762652579.850244", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rakuten/RakutenAI-2.0-mini-instruct", + "developer": "Rakuten", + "inference_platform": "unknown", + "id": "Rakuten/RakutenAI-2.0-mini-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6793906833867471 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2867197270809481 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05211480362537765 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3249166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11178523936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 1.535 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Rakuten/RakutenAI-7B-chat/91e22241-7b65-44b9-a437-34b56400af7a.json b/data/hfopenllm_v2/Rakuten/RakutenAI-7B-chat/91e22241-7b65-44b9-a437-34b56400af7a.json new file mode 100644 index 000000000..e8814cac3 --- /dev/null +++ b/data/hfopenllm_v2/Rakuten/RakutenAI-7B-chat/91e22241-7b65-44b9-a437-34b56400af7a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Rakuten_RakutenAI-7B-chat/1762652579.850715", + "retrieved_timestamp": "1762652579.8507159", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rakuten/RakutenAI-7B-chat", + "developer": "Rakuten", + "inference_platform": "unknown", + "id": "Rakuten/RakutenAI-7B-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26855521128383797 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4316204035758174 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37895833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2798371010638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.373 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Rakuten/RakutenAI-7B/cab9a80e-94a6-4e7b-8980-1fa4482bac8a.json b/data/hfopenllm_v2/Rakuten/RakutenAI-7B/cab9a80e-94a6-4e7b-8980-1fa4482bac8a.json new file mode 100644 index 000000000..6de5c6673 --- /dev/null +++ b/data/hfopenllm_v2/Rakuten/RakutenAI-7B/cab9a80e-94a6-4e7b-8980-1fa4482bac8a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Rakuten_RakutenAI-7B/1762652579.8505", + "retrieved_timestamp": "1762652579.850501", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rakuten/RakutenAI-7B", + "developer": "Rakuten", + "inference_platform": "unknown", + "id": "Rakuten/RakutenAI-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1555971488982566 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43149052613615435 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37381250000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28773271276595747 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.373 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Replete-AI/L3-Pneuma-8B/5eddb8a8-7281-4ae2-a4bc-f174598727e3.json b/data/hfopenllm_v2/Replete-AI/L3-Pneuma-8B/5eddb8a8-7281-4ae2-a4bc-f174598727e3.json new file mode 100644 index 000000000..e7a2835a3 --- /dev/null +++ b/data/hfopenllm_v2/Replete-AI/L3-Pneuma-8B/5eddb8a8-7281-4ae2-a4bc-f174598727e3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Replete-AI_L3-Pneuma-8B/1762652579.85093", + "retrieved_timestamp": "1762652579.850931", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Replete-AI/L3-Pneuma-8B", + "developer": "Replete-AI", + "inference_platform": "unknown", + "id": "Replete-AI/L3-Pneuma-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24132745559559746 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4908680380935449 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4105208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3175698138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Replete-AI/L3.1-Pneuma-8B/d20e8883-4cde-45dc-9d60-10284a2a5cdb.json b/data/hfopenllm_v2/Replete-AI/L3.1-Pneuma-8B/d20e8883-4cde-45dc-9d60-10284a2a5cdb.json new file mode 100644 index 000000000..88c4a6c75 --- /dev/null +++ b/data/hfopenllm_v2/Replete-AI/L3.1-Pneuma-8B/d20e8883-4cde-45dc-9d60-10284a2a5cdb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Replete-AI_L3.1-Pneuma-8B/1762652579.851203", + "retrieved_timestamp": "1762652579.8512042", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Replete-AI/L3.1-Pneuma-8B", + "developer": "Replete-AI", + "inference_platform": "unknown", + "id": "Replete-AI/L3.1-Pneuma-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.707642388861554 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.504990389092237 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21978851963746224 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3871145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36909906914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Replete-AI/Llama3-8B-Instruct-Replete-Adapted/861d8edd-2acf-4593-9768-8f77488ce8a4.json b/data/hfopenllm_v2/Replete-AI/Llama3-8B-Instruct-Replete-Adapted/861d8edd-2acf-4593-9768-8f77488ce8a4.json new file mode 100644 index 000000000..6722fef6a --- /dev/null +++ b/data/hfopenllm_v2/Replete-AI/Llama3-8B-Instruct-Replete-Adapted/861d8edd-2acf-4593-9768-8f77488ce8a4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Replete-AI_Llama3-8B-Instruct-Replete-Adapted/1762652579.8514109", + "retrieved_timestamp": "1762652579.851412", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Replete-AI/Llama3-8B-Instruct-Replete-Adapted", + "developer": "Replete-AI", + "inference_platform": "unknown", + "id": "Replete-AI/Llama3-8B-Instruct-Replete-Adapted" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6915306941138402 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48702618293318983 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07099697885196375 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36339583333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3390957446808511 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Replete-AI/Replete-Coder-Instruct-8b-Merged/398e665d-af8e-420c-95ce-5f9f4a4988af.json b/data/hfopenllm_v2/Replete-AI/Replete-Coder-Instruct-8b-Merged/398e665d-af8e-420c-95ce-5f9f4a4988af.json new file mode 100644 index 000000000..c36eb5212 --- /dev/null +++ b/data/hfopenllm_v2/Replete-AI/Replete-Coder-Instruct-8b-Merged/398e665d-af8e-420c-95ce-5f9f4a4988af.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-Coder-Instruct-8b-Merged/1762652579.851615", + "retrieved_timestamp": "1762652579.851616", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Replete-AI/Replete-Coder-Instruct-8b-Merged", + "developer": "Replete-AI", + "inference_platform": "unknown", + "id": "Replete-AI/Replete-Coder-Instruct-8b-Merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5387571643239937 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4461693860075828 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07779456193353475 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36603125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18051861702127658 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B/76f26fef-fa87-4cf5-a317-ea4b743e7432.json b/data/hfopenllm_v2/RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B/76f26fef-fa87-4cf5-a317-ea4b743e7432.json new file mode 100644 index 000000000..db4cf77b0 --- /dev/null +++ b/data/hfopenllm_v2/RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B/76f26fef-fa87-4cf5-a317-ea4b743e7432.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/RezVortex_JAJUKA-WEWILLNEVERFORGETYOU-3B/1762652579.853197", + "retrieved_timestamp": "1762652579.853197", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B", + "developer": "RezVortex", + "inference_platform": "unknown", + "id": "RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6858103166265509 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46189139399865614 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15483383685800603 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36302083333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3143284574468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/RezVortex/Jajuka-3b/a41d111c-dd5d-4f77-b52d-9a2dc9f31e50.json b/data/hfopenllm_v2/RezVortex/Jajuka-3b/a41d111c-dd5d-4f77-b52d-9a2dc9f31e50.json new file mode 100644 index 000000000..8ca85c344 --- /dev/null +++ b/data/hfopenllm_v2/RezVortex/Jajuka-3b/a41d111c-dd5d-4f77-b52d-9a2dc9f31e50.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/RezVortex_Jajuka-3b/1762652579.85344", + "retrieved_timestamp": "1762652579.853441", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RezVortex/Jajuka-3b", + "developer": "RezVortex", + "inference_platform": "unknown", + "id": "RezVortex/Jajuka-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6925047762159957 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4593872338446621 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1593655589123867 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3670833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3137466755319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Ro-xe/FMixIA-7B-DARE-0/93930443-dc12-422f-9920-470917ef8d7d.json b/data/hfopenllm_v2/Ro-xe/FMixIA-7B-DARE-0/93930443-dc12-422f-9920-470917ef8d7d.json new file mode 100644 index 000000000..da628214c --- /dev/null +++ b/data/hfopenllm_v2/Ro-xe/FMixIA-7B-DARE-0/93930443-dc12-422f-9920-470917ef8d7d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Ro-xe_FMixIA-7B-DARE-0/1762652579.8536398", + "retrieved_timestamp": "1762652579.853641", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ro-xe/FMixIA-7B-DARE-0", + "developer": "Ro-xe", + "inference_platform": "unknown", + "id": "Ro-xe/FMixIA-7B-DARE-0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3341256754300811 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5035332799973222 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.052870090634441085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45448958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3016123670212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Ro-xe/FMixIA-7B-SLERP-27/7f08546a-3f05-4612-879c-3f293daeabd4.json b/data/hfopenllm_v2/Ro-xe/FMixIA-7B-SLERP-27/7f08546a-3f05-4612-879c-3f293daeabd4.json new file mode 100644 index 000000000..886b27259 --- /dev/null +++ b/data/hfopenllm_v2/Ro-xe/FMixIA-7B-SLERP-27/7f08546a-3f05-4612-879c-3f293daeabd4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Ro-xe_FMixIA-7B-SLERP-27/1762652579.853882", + "retrieved_timestamp": "1762652579.8538828", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ro-xe/FMixIA-7B-SLERP-27", + "developer": "Ro-xe", + "inference_platform": "unknown", + "id": "Ro-xe/FMixIA-7B-SLERP-27" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3765409114482905 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5150591725181265 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44115624999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30078125 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Ro-xe/FMixIA-7B-TIES-1/b5d64806-0d01-4c99-9ba6-6aff88c894bd.json b/data/hfopenllm_v2/Ro-xe/FMixIA-7B-TIES-1/b5d64806-0d01-4c99-9ba6-6aff88c894bd.json new file mode 100644 index 000000000..a5317872a --- /dev/null +++ b/data/hfopenllm_v2/Ro-xe/FMixIA-7B-TIES-1/b5d64806-0d01-4c99-9ba6-6aff88c894bd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Ro-xe_FMixIA-7B-TIES-1/1762652579.8540852", + "retrieved_timestamp": "1762652579.8540852", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ro-xe/FMixIA-7B-TIES-1", + "developer": "Ro-xe", + "inference_platform": "unknown", + "id": "Ro-xe/FMixIA-7B-TIES-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34529160405501846 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5091539642456672 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46890625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2992021276595745 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9/0d1c7e5e-4ddf-447b-9581-c62cedc2fedc.json b/data/hfopenllm_v2/Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9/0d1c7e5e-4ddf-447b-9581-c62cedc2fedc.json new file mode 100644 index 000000000..9a8a5c6d5 --- /dev/null +++ b/data/hfopenllm_v2/Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9/0d1c7e5e-4ddf-447b-9581-c62cedc2fedc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Ro-xe_FMixIA-FrankenMerge-9.5B-PT-9/1762652579.8542862", + "retrieved_timestamp": "1762652579.8542871", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9", + "developer": "Ro-xe", + "inference_platform": "unknown", + "id": "Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19401632113902223 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5087851148631056 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0030211480362537764 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41703124999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36569148936170215 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.141 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2/63522d1e-d4bf-4071-a086-5ef016243ec1.json b/data/hfopenllm_v2/RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2/63522d1e-d4bf-4071-a086-5ef016243ec1.json new file mode 100644 index 000000000..2e8ae1bb0 --- /dev/null +++ b/data/hfopenllm_v2/RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2/63522d1e-d4bf-4071-a086-5ef016243ec1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/RubielLabarta_LogoS-7Bx2-MoE-13B-v0.2/1762652579.85476", + "retrieved_timestamp": "1762652579.85476", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2", + "developer": "RubielLabarta", + "inference_platform": "unknown", + "id": "RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4378903531518593 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5206958722481815 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4226145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087599734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SaisExperiments/Evil-Alpaca-3B-L3.2/f9c7c5b5-6274-4971-a81a-6f88ec07ca93.json b/data/hfopenllm_v2/SaisExperiments/Evil-Alpaca-3B-L3.2/f9c7c5b5-6274-4971-a81a-6f88ec07ca93.json new file mode 100644 index 000000000..2c7eab277 --- /dev/null +++ b/data/hfopenllm_v2/SaisExperiments/Evil-Alpaca-3B-L3.2/f9c7c5b5-6274-4971-a81a-6f88ec07ca93.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SaisExperiments_Evil-Alpaca-3B-L3.2/1762652579.8550148", + "retrieved_timestamp": "1762652579.8550148", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SaisExperiments/Evil-Alpaca-3B-L3.2", + "developer": "SaisExperiments", + "inference_platform": "unknown", + "id": "SaisExperiments/Evil-Alpaca-3B-L3.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32510848991786234 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4340757699220565 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0702416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4197604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2621343085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SaisExperiments/Gemma-2-2B-Opus-Instruct/369f84c6-022e-46ed-8cfc-2e0b4a8e175a.json b/data/hfopenllm_v2/SaisExperiments/Gemma-2-2B-Opus-Instruct/369f84c6-022e-46ed-8cfc-2e0b4a8e175a.json new file mode 100644 index 000000000..8445c054b --- /dev/null +++ b/data/hfopenllm_v2/SaisExperiments/Gemma-2-2B-Opus-Instruct/369f84c6-022e-46ed-8cfc-2e0b4a8e175a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SaisExperiments_Gemma-2-2B-Opus-Instruct/1762652579.855459", + "retrieved_timestamp": "1762652579.8554602", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SaisExperiments/Gemma-2-2B-Opus-Instruct", + "developer": "SaisExperiments", + "inference_platform": "unknown", + "id": "SaisExperiments/Gemma-2-2B-Opus-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.474959773401242 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4292846281445681 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4056875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2650432180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SaisExperiments/Not-So-Small-Alpaca-24B/98275290-dbd0-462e-9028-4daa65cd5ce3.json b/data/hfopenllm_v2/SaisExperiments/Not-So-Small-Alpaca-24B/98275290-dbd0-462e-9028-4daa65cd5ce3.json new file mode 100644 index 000000000..80591659d --- /dev/null +++ b/data/hfopenllm_v2/SaisExperiments/Not-So-Small-Alpaca-24B/98275290-dbd0-462e-9028-4daa65cd5ce3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SaisExperiments_Not-So-Small-Alpaca-24B/1762652579.855924", + "retrieved_timestamp": "1762652579.855925", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SaisExperiments/Not-So-Small-Alpaca-24B", + "developer": "SaisExperiments", + "inference_platform": "unknown", + "id": "SaisExperiments/Not-So-Small-Alpaca-24B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6243611395541607 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5338637679203099 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18277945619335348 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35906040268456374 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42816666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36943151595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SaisExperiments/QwOwO-7B-V1/9064bdc6-b84b-4022-9d7a-63b1b76fc1bc.json b/data/hfopenllm_v2/SaisExperiments/QwOwO-7B-V1/9064bdc6-b84b-4022-9d7a-63b1b76fc1bc.json new file mode 100644 index 000000000..4f71c9a34 --- /dev/null +++ b/data/hfopenllm_v2/SaisExperiments/QwOwO-7B-V1/9064bdc6-b84b-4022-9d7a-63b1b76fc1bc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SaisExperiments_QwOwO-7B-V1/1762652579.856126", + "retrieved_timestamp": "1762652579.856126", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SaisExperiments/QwOwO-7B-V1", + "developer": "SaisExperiments", + "inference_platform": "unknown", + "id": "SaisExperiments/QwOwO-7B-V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45562551806983254 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5431230107025949 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3859516616314199 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38348958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42237367021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Anemoi-3B/b50b5452-b824-4fd6-b0e4-cdaea09139a2.json b/data/hfopenllm_v2/Sakalti/Anemoi-3B/b50b5452-b824-4fd6-b0e4-cdaea09139a2.json new file mode 100644 index 000000000..ac7cdf508 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Anemoi-3B/b50b5452-b824-4fd6-b0e4-cdaea09139a2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Anemoi-3B/1762652579.856576", + "retrieved_timestamp": "1762652579.856576", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Anemoi-3B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Anemoi-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3803629924156793 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4921954661921298 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17749244712990936 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43706249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3765791223404255 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Euphrates-14B/db8c1ba2-4029-45c5-b8a6-5343356266eb.json b/data/hfopenllm_v2/Sakalti/Euphrates-14B/db8c1ba2-4029-45c5-b8a6-5343356266eb.json new file mode 100644 index 000000000..2e0682aaf --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Euphrates-14B/db8c1ba2-4029-45c5-b8a6-5343356266eb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Euphrates-14B/1762652579.856813", + "retrieved_timestamp": "1762652579.8568141", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Euphrates-14B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Euphrates-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26468326263203856 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6137691668744961 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30513595166163143 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3934563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45157291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5255152925531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Magro-7B-v1.1/9e6c7958-689f-4437-b81a-c055d53ca33e.json b/data/hfopenllm_v2/Sakalti/Magro-7B-v1.1/9e6c7958-689f-4437-b81a-c055d53ca33e.json new file mode 100644 index 000000000..103a40780 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Magro-7B-v1.1/9e6c7958-689f-4437-b81a-c055d53ca33e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Magro-7B-v1.1/1762652579.857256", + "retrieved_timestamp": "1762652579.857256", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Magro-7B-v1.1", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Magro-7B-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1204016454119514 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41790625208343796 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.024924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4433229166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27642952127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Neptuno-3B/4c2150fc-f473-4bdc-8823-960778ccbc75.json b/data/hfopenllm_v2/Sakalti/Neptuno-3B/4c2150fc-f473-4bdc-8823-960778ccbc75.json new file mode 100644 index 000000000..1d612a155 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Neptuno-3B/4c2150fc-f473-4bdc-8823-960778ccbc75.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Neptuno-3B/1762652579.857454", + "retrieved_timestamp": "1762652579.857455", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Neptuno-3B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Neptuno-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42962229107656574 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48335808848564965 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40019791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3773271276595745 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Neptuno-Alpha/511ac4a5-6fc8-4338-845d-859d73d57678.json b/data/hfopenllm_v2/Sakalti/Neptuno-Alpha/511ac4a5-6fc8-4338-845d-859d73d57678.json new file mode 100644 index 000000000..7fd98b556 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Neptuno-Alpha/511ac4a5-6fc8-4338-845d-859d73d57678.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Neptuno-Alpha/1762652579.857697", + "retrieved_timestamp": "1762652579.857698", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Neptuno-Alpha", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Neptuno-Alpha" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3779649108809071 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49247749379461303 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18353474320241692 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43706249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3767453457446808 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Oxyge1-33B/ee17e3a4-2036-4e57-9ada-51fe6d23ffac.json b/data/hfopenllm_v2/Sakalti/Oxyge1-33B/ee17e3a4-2036-4e57-9ada-51fe6d23ffac.json new file mode 100644 index 000000000..927bf0e35 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Oxyge1-33B/ee17e3a4-2036-4e57-9ada-51fe6d23ffac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Oxyge1-33B/1762652579.8578959", + "retrieved_timestamp": "1762652579.857897", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Oxyge1-33B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Oxyge1-33B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4548265269484966 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7033278292161169 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4962235649546828 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3825503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5007812500000001 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5909242021276596 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Qwen2.5-1B-Instruct/da01b31f-dde8-45dd-b793-c8258a09ddee.json b/data/hfopenllm_v2/Sakalti/Qwen2.5-1B-Instruct/da01b31f-dde8-45dd-b793-c8258a09ddee.json new file mode 100644 index 000000000..9596418cf --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Qwen2.5-1B-Instruct/da01b31f-dde8-45dd-b793-c8258a09ddee.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Qwen2.5-1B-Instruct/1762652579.858331", + "retrieved_timestamp": "1762652579.858331", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Qwen2.5-1B-Instruct", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Qwen2.5-1B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17513198313807365 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30271528035563927 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33688541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12134308510638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.988 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-0.5B/7763650a-8a37-41f2-aadd-b1db7b41d0b3.json b/data/hfopenllm_v2/Sakalti/SJT-0.5B/7763650a-8a37-41f2-aadd-b1db7b41d0b3.json new file mode 100644 index 000000000..3a8ba2f4a --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-0.5B/7763650a-8a37-41f2-aadd-b1db7b41d0b3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-0.5B/1762652579.858787", + "retrieved_timestamp": "1762652579.858787", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJT-0.5B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJT-0.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24247662867857286 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33055365550588683 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05211480362537765 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31958333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18907912234042554 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha-1.1/e3f05df1-a653-41a0-983a-4a7d86b85c60.json b/data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha-1.1/e3f05df1-a653-41a0-983a-4a7d86b85c60.json new file mode 100644 index 000000000..a99b3ddb8 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha-1.1/e3f05df1-a653-41a0-983a-4a7d86b85c60.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-1.5B-Alpha-1.1/1762652579.859199", + "retrieved_timestamp": "1762652579.859199", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJT-1.5B-Alpha-1.1", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJT-1.5B-Alpha-1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3439429602344003 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4243160272518483 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09592145015105741 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42391666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.296625664893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha/21472871-fe74-447a-894c-80d77ae4ad0a.json b/data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha/21472871-fe74-447a-894c-80d77ae4ad0a.json new file mode 100644 index 000000000..0b5ea9300 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-1.5B-Alpha/21472871-fe74-447a-894c-80d77ae4ad0a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-1.5B-Alpha/1762652579.858988", + "retrieved_timestamp": "1762652579.858989", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJT-1.5B-Alpha", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJT-1.5B-Alpha" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3448671746521452 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4240819448548446 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09969788519637462 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4226145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961269946808511 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-1.7B/6e2f01c1-ba87-4687-9db1-a0c0004bdfe1.json b/data/hfopenllm_v2/Sakalti/SJT-1.7B/6e2f01c1-ba87-4687-9db1-a0c0004bdfe1.json new file mode 100644 index 000000000..b5152c089 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-1.7B/6e2f01c1-ba87-4687-9db1-a0c0004bdfe1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-1.7B/1762652579.859416", + "retrieved_timestamp": "1762652579.8594172", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJT-1.7B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJT-1.7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17762980004166723 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2934008926922806 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0015105740181268882 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24161073825503357 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39641666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11328125 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.684 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-14B/1169b5fd-9418-4986-940a-276d163431c0.json b/data/hfopenllm_v2/Sakalti/SJT-14B/1169b5fd-9418-4986-940a-276d163431c0.json new file mode 100644 index 000000000..8d3c36205 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-14B/1169b5fd-9418-4986-940a-276d163431c0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-14B/1762652579.8596292", + "retrieved_timestamp": "1762652579.85963", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJT-14B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJT-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5494233079340594 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6536135646865123 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38444108761329304 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38674496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.476625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5380651595744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-2.4B/30b98827-5afb-4bfe-b765-9c81cb4580f4.json b/data/hfopenllm_v2/Sakalti/SJT-2.4B/30b98827-5afb-4bfe-b765-9c81cb4580f4.json new file mode 100644 index 000000000..dc5b5c94e --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-2.4B/30b98827-5afb-4bfe-b765-9c81cb4580f4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-2.4B/1762652579.859841", + "retrieved_timestamp": "1762652579.859841", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJT-2.4B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJT-2.4B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28042039566128985 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.349012395546882 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36990624999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1858377659574468 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.432 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-24B-Alpha/f86649f8-8962-4496-8cd8-fed702a7e63b.json b/data/hfopenllm_v2/Sakalti/SJT-24B-Alpha/f86649f8-8962-4496-8cd8-fed702a7e63b.json new file mode 100644 index 000000000..79d30ce36 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-24B-Alpha/f86649f8-8962-4496-8cd8-fed702a7e63b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-24B-Alpha/1762652579.860041", + "retrieved_timestamp": "1762652579.860041", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJT-24B-Alpha", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJT-24B-Alpha" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3206370208823699 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6080838080485248 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25302114803625375 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45947916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48570478723404253 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 24.125 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-2B-V1.1/b4e467a7-3f2d-438a-8c42-1f7da1aafd20.json b/data/hfopenllm_v2/Sakalti/SJT-2B-V1.1/b4e467a7-3f2d-438a-8c42-1f7da1aafd20.json new file mode 100644 index 000000000..dc3b675e0 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-2B-V1.1/b4e467a7-3f2d-438a-8c42-1f7da1aafd20.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-2B-V1.1/1762652579.860439", + "retrieved_timestamp": "1762652579.860439", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJT-2B-V1.1", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJT-2B-V1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3977235956151899 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39838417813569243 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04833836858006042 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42993750000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21243351063829788 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-2B/f720d81c-04e1-4f8a-b452-ae52cc7d9fb2.json b/data/hfopenllm_v2/Sakalti/SJT-2B/f720d81c-04e1-4f8a-b452-ae52cc7d9fb2.json new file mode 100644 index 000000000..71dc83a6b --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-2B/f720d81c-04e1-4f8a-b452-ae52cc7d9fb2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-2B/1762652579.8602371", + "retrieved_timestamp": "1762652579.860238", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJT-2B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJT-2B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21507378200951255 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29364597509285106 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0007552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24161073825503357 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35641666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11868351063829788 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-3.7B/e82f1a2e-f679-47b8-9fbb-a53116e2195b.json b/data/hfopenllm_v2/Sakalti/SJT-3.7B/e82f1a2e-f679-47b8-9fbb-a53116e2195b.json new file mode 100644 index 000000000..646383ee4 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-3.7B/e82f1a2e-f679-47b8-9fbb-a53116e2195b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-3.7B/1762652579.860638", + "retrieved_timestamp": "1762652579.8606389", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJT-3.7B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJT-3.7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10776184966998675 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3393045259885476 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36171875000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1505152925531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.783 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-4B/5115cea0-d3bf-486b-9609-36698e845653.json b/data/hfopenllm_v2/Sakalti/SJT-4B/5115cea0-d3bf-486b-9609-36698e845653.json new file mode 100644 index 000000000..e8dc13ef4 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-4B/5115cea0-d3bf-486b-9609-36698e845653.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-4B/1762652579.8608499", + "retrieved_timestamp": "1762652579.860851", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJT-4B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJT-4B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4077403511571519 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4885743296577029 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11555891238670694 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4779583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.328125 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-7.5B/57934f76-c8bd-4264-a3b4-14234dda0719.json b/data/hfopenllm_v2/Sakalti/SJT-7.5B/57934f76-c8bd-4264-a3b4-14234dda0719.json new file mode 100644 index 000000000..2a10c73b3 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-7.5B/57934f76-c8bd-4264-a3b4-14234dda0719.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-7.5B/1762652579.861058", + "retrieved_timestamp": "1762652579.861058", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJT-7.5B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJT-7.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42232831110342783 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5367364587851736 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21676737160120846 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3263422818791946 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43988541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3951130319148936 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-7B-V1.1-Multilingal/03cb237a-0519-449c-b9c7-d9fbb4d119cd.json b/data/hfopenllm_v2/Sakalti/SJT-7B-V1.1-Multilingal/03cb237a-0519-449c-b9c7-d9fbb4d119cd.json new file mode 100644 index 000000000..41d4bc8e6 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-7B-V1.1-Multilingal/03cb237a-0519-449c-b9c7-d9fbb4d119cd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-7B-V1.1-Multilingal/1762652579.861463", + "retrieved_timestamp": "1762652579.861464", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJT-7B-V1.1-Multilingal", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJT-7B-V1.1-Multilingal" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19494053555676716 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2919597646466201 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.004531722054380665 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.362125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11369680851063829 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-7B-V1.1/b1527426-9cc0-4eb5-af52-30e36e0e04fd.json b/data/hfopenllm_v2/Sakalti/SJT-7B-V1.1/b1527426-9cc0-4eb5-af52-30e36e0e04fd.json new file mode 100644 index 000000000..cea7e3075 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-7B-V1.1/b1527426-9cc0-4eb5-af52-30e36e0e04fd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-7B-V1.1/1762652579.861262", + "retrieved_timestamp": "1762652579.861263", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJT-7B-V1.1", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJT-7B-V1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4702888336281067 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5418885259534293 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.243202416918429 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3338926174496644 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44106249999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.441156914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-8B-V1.1/0cf37c9e-9218-4366-8065-befea0d2b749.json b/data/hfopenllm_v2/Sakalti/SJT-8B-V1.1/0cf37c9e-9218-4366-8065-befea0d2b749.json new file mode 100644 index 000000000..d5d7ba32a --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-8B-V1.1/0cf37c9e-9218-4366-8065-befea0d2b749.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-8B-V1.1/1762652579.8618612", + "retrieved_timestamp": "1762652579.861862", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJT-8B-V1.1", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJT-8B-V1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4620706392372239 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5120768392487195 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20694864048338368 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33640939597315433 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4266145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4231216755319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 8.545 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-8B/cb136400-7d0e-4194-9a45-1646ff8cac95.json b/data/hfopenllm_v2/Sakalti/SJT-8B/cb136400-7d0e-4194-9a45-1646ff8cac95.json new file mode 100644 index 000000000..136152a74 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-8B/cb136400-7d0e-4194-9a45-1646ff8cac95.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-8B/1762652579.861662", + "retrieved_timestamp": "1762652579.8616629", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJT-8B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJT-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6534871917623019 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5281955607099067 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2537764350453172 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3296979865771812 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4079791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4266123670212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 8.548 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-900M/ff057dd9-0102-485d-88d7-7e50145b5f7e.json b/data/hfopenllm_v2/Sakalti/SJT-900M/ff057dd9-0102-485d-88d7-7e50145b5f7e.json new file mode 100644 index 000000000..04d3a669e --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-900M/ff057dd9-0102-485d-88d7-7e50145b5f7e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-900M/1762652579.862072", + "retrieved_timestamp": "1762652579.8620732", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJT-900M", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJT-900M" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2410027615615456 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31692036321713823 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35945833333333327 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11419547872340426 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.899 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJT-Moe2x7.5B/e95c6f08-ab57-49a2-a83b-6a77b5ab69d9.json b/data/hfopenllm_v2/Sakalti/SJT-Moe2x7.5B/e95c6f08-ab57-49a2-a83b-6a77b5ab69d9.json new file mode 100644 index 000000000..1f4af8d67 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJT-Moe2x7.5B/e95c6f08-ab57-49a2-a83b-6a77b5ab69d9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJT-Moe2x7.5B/1762652579.862277", + "retrieved_timestamp": "1762652579.862278", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJT-Moe2x7.5B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJT-Moe2x7.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41166216749336204 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5370697921185069 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21450151057401812 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3263422818791946 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43988541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3953623670212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 13.401 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJTPass-2/7f508bd9-7f95-453d-9e96-747ce91a64b3.json b/data/hfopenllm_v2/Sakalti/SJTPass-2/7f508bd9-7f95-453d-9e96-747ce91a64b3.json new file mode 100644 index 000000000..f591f0123 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJTPass-2/7f508bd9-7f95-453d-9e96-747ce91a64b3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJTPass-2/1762652579.8624809", + "retrieved_timestamp": "1762652579.8624818", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJTPass-2", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJTPass-2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24002867945939 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33022032217255354 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.052870090634441085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32225 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1901595744680851 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJTPass-4/f814a3bd-b82e-4769-9ef7-a4670420bca0.json b/data/hfopenllm_v2/Sakalti/SJTPass-4/f814a3bd-b82e-4769-9ef7-a4670420bca0.json new file mode 100644 index 000000000..3021202b3 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJTPass-4/f814a3bd-b82e-4769-9ef7-a4670420bca0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJTPass-4/1762652579.8627222", + "retrieved_timestamp": "1762652579.8627222", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJTPass-4", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJTPass-4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19129354557019818 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2963644180215358 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0022658610271903325 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38981249999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10829454787234043 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.167 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SJTPass-5/5d5bda4e-8994-4cef-9772-d4bd435e9644.json b/data/hfopenllm_v2/Sakalti/SJTPass-5/5d5bda4e-8994-4cef-9772-d4bd435e9644.json new file mode 100644 index 000000000..034438e4d --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SJTPass-5/5d5bda4e-8994-4cef-9772-d4bd435e9644.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SJTPass-5/1762652579.862921", + "retrieved_timestamp": "1762652579.862922", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SJTPass-5", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SJTPass-5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24247662867857286 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31029599812555747 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3794270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13272938829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.809 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saba-Passthrough-2/df1e7d22-c300-4466-92b7-770078a1dc09.json b/data/hfopenllm_v2/Sakalti/Saba-Passthrough-2/df1e7d22-c300-4466-92b7-770078a1dc09.json new file mode 100644 index 000000000..3f6833696 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saba-Passthrough-2/df1e7d22-c300-4466-92b7-770078a1dc09.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Saba-Passthrough-2/1762652579.863117", + "retrieved_timestamp": "1762652579.8631182", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Saba-Passthrough-2", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Saba-Passthrough-2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16913677930114318 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36724803467499195 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0007552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3844479166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20769614361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.087 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saba1-1.8B/d8cc8e9e-b672-4b26-a454-f97cd7a08648.json b/data/hfopenllm_v2/Sakalti/Saba1-1.8B/d8cc8e9e-b672-4b26-a454-f97cd7a08648.json new file mode 100644 index 000000000..768acf6d2 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saba1-1.8B/d8cc8e9e-b672-4b26-a454-f97cd7a08648.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Saba1-1.8B/1762652579.863334", + "retrieved_timestamp": "1762652579.863334", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Saba1-1.8B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Saba1-1.8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3332768166243345 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4147375470428282 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1540785498489426 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4238854166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2925531914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saba1-7B/1200ed26-8450-4788-a1bf-20f2c9b9b2c0.json b/data/hfopenllm_v2/Sakalti/Saba1-7B/1200ed26-8450-4788-a1bf-20f2c9b9b2c0.json new file mode 100644 index 000000000..eb0224d69 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saba1-7B/1200ed26-8450-4788-a1bf-20f2c9b9b2c0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Saba1-7B/1762652579.863542", + "retrieved_timestamp": "1762652579.863542", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Saba1-7B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Saba1-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45847351693506566 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5489063327459239 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36631419939577037 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47932291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43758311170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saba1.5-1.5B/a76090d4-a0fb-45c8-b28c-fa225ec3d11c.json b/data/hfopenllm_v2/Sakalti/Saba1.5-1.5B/a76090d4-a0fb-45c8-b28c-fa225ec3d11c.json new file mode 100644 index 000000000..49b8aef61 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saba1.5-1.5B/a76090d4-a0fb-45c8-b28c-fa225ec3d11c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Saba1.5-1.5B/1762652579.8637571", + "retrieved_timestamp": "1762652579.863758", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Saba1.5-1.5B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Saba1.5-1.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3332768166243345 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4147375470428282 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1540785498489426 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4238854166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2925531914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saba1.5-Pro-3B/14e1dd44-92f1-4d97-be67-fa98c9802ff1.json b/data/hfopenllm_v2/Sakalti/Saba1.5-Pro-3B/14e1dd44-92f1-4d97-be67-fa98c9802ff1.json new file mode 100644 index 000000000..4d4c3ce48 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saba1.5-Pro-3B/14e1dd44-92f1-4d97-be67-fa98c9802ff1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Saba1.5-Pro-3B/1762652579.863965", + "retrieved_timestamp": "1762652579.863966", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Saba1.5-Pro-3B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Saba1.5-Pro-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23860468002677343 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3622910501405146 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027190332326283987 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44054166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19581117021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.9 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saba2-14B-Preview/e3e0180f-bbd8-491a-a41b-54801e9f71de.json b/data/hfopenllm_v2/Sakalti/Saba2-14B-Preview/e3e0180f-bbd8-491a-a41b-54801e9f71de.json new file mode 100644 index 000000000..7e0b77da3 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saba2-14B-Preview/e3e0180f-bbd8-491a-a41b-54801e9f71de.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Saba2-14B-Preview/1762652579.864167", + "retrieved_timestamp": "1762652579.864168", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Saba2-14B-Preview", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Saba2-14B-Preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4721871301480073 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.649628096691823 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31268882175226587 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3825503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4781458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5383976063829787 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saba2-3B/b759686f-082e-44b6-9cf8-44a48f66c136.json b/data/hfopenllm_v2/Sakalti/Saba2-3B/b759686f-082e-44b6-9cf8-44a48f66c136.json new file mode 100644 index 000000000..ea9b643bf --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saba2-3B/b759686f-082e-44b6-9cf8-44a48f66c136.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Saba2-3B/1762652579.864372", + "retrieved_timestamp": "1762652579.864373", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Saba2-3B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Saba2-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28651533486704167 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28011877359000464 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2617449664429531 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39269791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12101063829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Sailor-japanese/8449b01f-c489-4008-97d4-aa3f0394cda4.json b/data/hfopenllm_v2/Sakalti/Sailor-japanese/8449b01f-c489-4008-97d4-aa3f0394cda4.json new file mode 100644 index 000000000..61fe6c422 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Sailor-japanese/8449b01f-c489-4008-97d4-aa3f0394cda4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Sailor-japanese/1762652579.864587", + "retrieved_timestamp": "1762652579.864588", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Sailor-japanese", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Sailor-japanese" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16046866757979938 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2912583602962783 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0030211480362537764 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3911770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11643949468085106 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saka-1.5B/854baf47-af97-46dd-acfe-a3710976fd57.json b/data/hfopenllm_v2/Sakalti/Saka-1.5B/854baf47-af97-46dd-acfe-a3710976fd57.json new file mode 100644 index 000000000..7945fdd32 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saka-1.5B/854baf47-af97-46dd-acfe-a3710976fd57.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Saka-1.5B/1762652579.8647912", + "retrieved_timestamp": "1762652579.8647912", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Saka-1.5B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Saka-1.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726266306732802 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3987868899865206 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08006042296072508 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37390625000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24152260638297873 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saka-14B/53556d59-3b32-44bc-9932-c52f05939b57.json b/data/hfopenllm_v2/Sakalti/Saka-14B/53556d59-3b32-44bc-9932-c52f05939b57.json new file mode 100644 index 000000000..c5cd6c135 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saka-14B/53556d59-3b32-44bc-9932-c52f05939b57.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Saka-14B/1762652579.8649821", + "retrieved_timestamp": "1762652579.864983", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Saka-14B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Saka-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7174341857382855 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6496945295195891 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4093655589123867 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3959731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48859375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.539561170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saka-24B/a5e13aa9-bf5f-4201-bc93-504521141f43.json b/data/hfopenllm_v2/Sakalti/Saka-24B/a5e13aa9-bf5f-4201-bc93-504521141f43.json new file mode 100644 index 000000000..a67f288ee --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saka-24B/a5e13aa9-bf5f-4201-bc93-504521141f43.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Saka-24B/1762652579.865175", + "retrieved_timestamp": "1762652579.865176", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Saka-24B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Saka-24B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38186123928952953 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6072116494463233 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18051359516616314 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3422818791946309 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45408333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4765625 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saka-7.2B/07f036d7-af59-49a8-8346-8a9a9dd21439.json b/data/hfopenllm_v2/Sakalti/Saka-7.2B/07f036d7-af59-49a8-8346-8a9a9dd21439.json new file mode 100644 index 000000000..af1b8f4d1 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saka-7.2B/07f036d7-af59-49a8-8346-8a9a9dd21439.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Saka-7.2B/1762652579.86556", + "retrieved_timestamp": "1762652579.865563", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Saka-7.2B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Saka-7.2B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1544989516704566 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2945156585364917 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23909395973154363 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37105208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11602393617021277 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.292 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Saka-7.6B/10923a84-a611-4830-b84c-0e91c0628541.json b/data/hfopenllm_v2/Sakalti/Saka-7.6B/10923a84-a611-4830-b84c-0e91c0628541.json new file mode 100644 index 000000000..5f9be49a1 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Saka-7.6B/10923a84-a611-4830-b84c-0e91c0628541.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Saka-7.6B/1762652579.865891", + "retrieved_timestamp": "1762652579.8658922", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Saka-7.6B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Saka-7.6B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45242844541372446 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5655284792075981 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3255287009063444 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4489375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45403922872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SakaMoe-3x1.6B-Instruct/e806f2f4-0a10-49f6-a67e-dc1dd0a59ede.json b/data/hfopenllm_v2/Sakalti/SakaMoe-3x1.6B-Instruct/e806f2f4-0a10-49f6-a67e-dc1dd0a59ede.json new file mode 100644 index 000000000..63079d6d2 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SakaMoe-3x1.6B-Instruct/e806f2f4-0a10-49f6-a67e-dc1dd0a59ede.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SakaMoe-3x1.6B-Instruct/1762652579.866188", + "retrieved_timestamp": "1762652579.8661902", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SakaMoe-3x1.6B-Instruct", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SakaMoe-3x1.6B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23708094522533543 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.328247997224552 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33421875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18824800531914893 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 1.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SakalFusion-7B-Alpha/2329f6f2-228a-400b-9b2d-4ad6dd278b79.json b/data/hfopenllm_v2/Sakalti/SakalFusion-7B-Alpha/2329f6f2-228a-400b-9b2d-4ad6dd278b79.json new file mode 100644 index 000000000..3eb02591a --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SakalFusion-7B-Alpha/2329f6f2-228a-400b-9b2d-4ad6dd278b79.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SakalFusion-7B-Alpha/1762652579.866478", + "retrieved_timestamp": "1762652579.8664792", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SakalFusion-7B-Alpha", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SakalFusion-7B-Alpha" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5289653674472622 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.559133672829116 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38444108761329304 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32550335570469796 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4581458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4473902925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/SakalFusion-7B-Beta/537a91f9-b1f3-49bf-bef7-a9ef8578c284.json b/data/hfopenllm_v2/Sakalti/SakalFusion-7B-Beta/537a91f9-b1f3-49bf-bef7-a9ef8578c284.json new file mode 100644 index 000000000..5687503db --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/SakalFusion-7B-Beta/537a91f9-b1f3-49bf-bef7-a9ef8578c284.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_SakalFusion-7B-Beta/1762652579.866734", + "retrieved_timestamp": "1762652579.8667352", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/SakalFusion-7B-Beta", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/SakalFusion-7B-Beta" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18090222830977362 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2881298650933641 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24328859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3872083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10895944148936171 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/Tara-3.8B-v1.1/cd884e16-7e4d-4d17-8bad-5819604e0384.json b/data/hfopenllm_v2/Sakalti/Tara-3.8B-v1.1/cd884e16-7e4d-4d17-8bad-5819604e0384.json new file mode 100644 index 000000000..7973978ee --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/Tara-3.8B-v1.1/cd884e16-7e4d-4d17-8bad-5819604e0384.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Tara-3.8B-v1.1/1762652579.866961", + "retrieved_timestamp": "1762652579.866962", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Tara-3.8B-v1.1", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/Tara-3.8B-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40621661635571393 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4885743296577029 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11555891238670694 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4779583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.328125 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/light-1.1-3B/9da5b03b-0207-4e98-a5bf-5a658225e78f.json b/data/hfopenllm_v2/Sakalti/light-1.1-3B/9da5b03b-0207-4e98-a5bf-5a658225e78f.json new file mode 100644 index 000000000..c894a3a74 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/light-1.1-3B/9da5b03b-0207-4e98-a5bf-5a658225e78f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_light-1.1-3B/1762652579.867201", + "retrieved_timestamp": "1762652579.867202", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/light-1.1-3B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/light-1.1-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27345110972220377 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28027723572953045 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.011329305135951661 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2617449664429531 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3900625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12092752659574468 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/light-3B/a1593642-8d60-4680-90aa-8c3789d536d6.json b/data/hfopenllm_v2/Sakalti/light-3B/a1593642-8d60-4680-90aa-8c3789d536d6.json new file mode 100644 index 000000000..1fa7f3cc1 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/light-3B/a1593642-8d60-4680-90aa-8c3789d536d6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_light-3B/1762652579.8674219", + "retrieved_timestamp": "1762652579.867423", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/light-3B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/light-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5337360425892188 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4831034368803701 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2590634441087613 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40149999999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3774933510638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/light-3b-beta/2a4293ca-2434-4752-a08f-163257e0fde4.json b/data/hfopenllm_v2/Sakalti/light-3b-beta/2a4293ca-2434-4752-a08f-163257e0fde4.json new file mode 100644 index 000000000..f77fe325e --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/light-3b-beta/2a4293ca-2434-4752-a08f-163257e0fde4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_light-3b-beta/1762652579.867648", + "retrieved_timestamp": "1762652579.867649", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/light-3b-beta", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/light-3b-beta" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5485489612007252 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48152297262112204 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.277190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40146875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3758311170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/light-7b-beta/a66efce1-f6d2-4fad-964b-cc4e80012145.json b/data/hfopenllm_v2/Sakalti/light-7b-beta/a66efce1-f6d2-4fad-964b-cc4e80012145.json new file mode 100644 index 000000000..fd74e4de5 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/light-7b-beta/a66efce1-f6d2-4fad-964b-cc4e80012145.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_light-7b-beta/1762652579.867865", + "retrieved_timestamp": "1762652579.867866", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/light-7b-beta", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/light-7b-beta" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6233870574520051 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5548193064288276 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3768882175226586 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42906249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.445561835106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/llama-3-yanyuedao-8b-instruct/cb550de6-4cd6-411e-9426-dc12421404ad.json b/data/hfopenllm_v2/Sakalti/llama-3-yanyuedao-8b-instruct/cb550de6-4cd6-411e-9426-dc12421404ad.json new file mode 100644 index 000000000..51a2babb3 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/llama-3-yanyuedao-8b-instruct/cb550de6-4cd6-411e-9426-dc12421404ad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_llama-3-yanyuedao-8b-instruct/1762652579.8681011", + "retrieved_timestamp": "1762652579.8681011", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/llama-3-yanyuedao-8b-instruct", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/llama-3-yanyuedao-8b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21857116894284942 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43497849055247495 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41985416666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29105718085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/magro-7B/c2c87be8-4137-4bcc-8cbe-4589d193e94d.json b/data/hfopenllm_v2/Sakalti/magro-7B/c2c87be8-4137-4bcc-8cbe-4589d193e94d.json new file mode 100644 index 000000000..a73d9cc96 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/magro-7B/c2c87be8-4137-4bcc-8cbe-4589d193e94d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_magro-7B/1762652579.868387", + "retrieved_timestamp": "1762652579.8683882", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/magro-7B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/magro-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13439008497453425 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4185526485966236 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44598958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2765126329787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/mergekit-01/dd01becb-c2c0-4593-ac1e-db2ff11aa17b.json b/data/hfopenllm_v2/Sakalti/mergekit-01/dd01becb-c2c0-4593-ac1e-db2ff11aa17b.json new file mode 100644 index 000000000..003d6d32f --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/mergekit-01/dd01becb-c2c0-4593-ac1e-db2ff11aa17b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_mergekit-01/1762652579.868608", + "retrieved_timestamp": "1762652579.868609", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/mergekit-01", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/mergekit-01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6233870574520051 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5548193064288276 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3768882175226586 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42906249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.445561835106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/mergekit-della_linear-vmeykci/a4bd1768-2382-47fe-a8bd-6e42bda06d2f.json b/data/hfopenllm_v2/Sakalti/mergekit-della_linear-vmeykci/a4bd1768-2382-47fe-a8bd-6e42bda06d2f.json new file mode 100644 index 000000000..b41889500 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/mergekit-della_linear-vmeykci/a4bd1768-2382-47fe-a8bd-6e42bda06d2f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_mergekit-della_linear-vmeykci/1762652579.868854", + "retrieved_timestamp": "1762652579.868856", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/mergekit-della_linear-vmeykci", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/mergekit-della_linear-vmeykci" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1126078804239418 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28155028620092587 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38968749999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10887632978723404 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/model-3/efd2a4d7-afcd-4653-ad4f-7d4f7206be95.json b/data/hfopenllm_v2/Sakalti/model-3/efd2a4d7-afcd-4653-ad4f-7d4f7206be95.json new file mode 100644 index 000000000..e427e1d1b --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/model-3/efd2a4d7-afcd-4653-ad4f-7d4f7206be95.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_model-3/1762652579.869146", + "retrieved_timestamp": "1762652579.869148", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/model-3", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/model-3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6263846593704703 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.554216994021922 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37084592145015105 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4263958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4454787234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/tara-3.8B/695d7b01-14e6-40e4-b398-541e87a812c8.json b/data/hfopenllm_v2/Sakalti/tara-3.8B/695d7b01-14e6-40e4-b398-541e87a812c8.json new file mode 100644 index 000000000..a1fb0e1e4 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/tara-3.8B/695d7b01-14e6-40e4-b398-541e87a812c8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_tara-3.8B/1762652579.86961", + "retrieved_timestamp": "1762652579.869611", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/tara-3.8B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/tara-3.8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4077403511571519 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4885743296577029 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11555891238670694 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4779583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.328125 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.2/f3f888bb-5e99-4521-83b2-4e182f492220.json b/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.2/f3f888bb-5e99-4521-83b2-4e182f492220.json new file mode 100644 index 000000000..97366d5db --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.2/f3f888bb-5e99-4521-83b2-4e182f492220.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-14B-v0.2/1762652579.870035", + "retrieved_timestamp": "1762652579.870036", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/ultiima-14B-v0.2", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/ultiima-14B-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7069930007934502 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6472012505703305 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3995468277945619 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3825503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4793541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5387300531914894 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.3/5cd3794f-990f-4965-9fbc-7faf3216e808.json b/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.3/5cd3794f-990f-4965-9fbc-7faf3216e808.json new file mode 100644 index 000000000..b88384d5d --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.3/5cd3794f-990f-4965-9fbc-7faf3216e808.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-14B-v0.3/1762652579.870242", + "retrieved_timestamp": "1762652579.870243", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/ultiima-14B-v0.3", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/ultiima-14B-v0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7040452665593957 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.639820771660141 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39652567975830816 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3766778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47541666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5336602393617021 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.4/688f9751-e261-41c6-a7a4-2dc33a702e09.json b/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.4/688f9751-e261-41c6-a7a4-2dc33a702e09.json new file mode 100644 index 000000000..9de242835 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/ultiima-14B-v0.4/688f9751-e261-41c6-a7a4-2dc33a702e09.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-14B-v0.4/1762652579.8704672", + "retrieved_timestamp": "1762652579.8704839", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/ultiima-14B-v0.4", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/ultiima-14B-v0.4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3008284684636764 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6420007859105136 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35347432024169184 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3959731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4885625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.527842420212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/ultiima-14B/abf448a9-decf-432d-8883-6e1492a7c040.json b/data/hfopenllm_v2/Sakalti/ultiima-14B/abf448a9-decf-432d-8883-6e1492a7c040.json new file mode 100644 index 000000000..3d84876d1 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/ultiima-14B/abf448a9-decf-432d-8883-6e1492a7c040.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-14B/1762652579.869824", + "retrieved_timestamp": "1762652579.8698251", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/ultiima-14B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/ultiima-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5700563394016764 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6491153472177067 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4697885196374622 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37416107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4717604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5380651595744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/ultiima-32B/18f686ca-453d-4a0c-9f1a-e2f4ba53399c.json b/data/hfopenllm_v2/Sakalti/ultiima-32B/18f686ca-453d-4a0c-9f1a-e2f4ba53399c.json new file mode 100644 index 000000000..c9cdb3fcd --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/ultiima-32B/18f686ca-453d-4a0c-9f1a-e2f4ba53399c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-32B/1762652579.870782", + "retrieved_timestamp": "1762652579.870784", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/ultiima-32B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/ultiima-32B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6854357549080883 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7037285782797875 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4962235649546828 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4994791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5910073138297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/ultiima-72B-v1.5/258aae52-b934-4ba1-bdb0-e15bd8277234.json b/data/hfopenllm_v2/Sakalti/ultiima-72B-v1.5/258aae52-b934-4ba1-bdb0-e15bd8277234.json new file mode 100644 index 000000000..affa15c00 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/ultiima-72B-v1.5/258aae52-b934-4ba1-bdb0-e15bd8277234.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-72B-v1.5/1762652579.8712351", + "retrieved_timestamp": "1762652579.8712351", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/ultiima-72B-v1.5", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/ultiima-72B-v1.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6549610588793291 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7391727188223717 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4395770392749245 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41359060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46909375000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6053856382978723 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sakalti/ultiima-72B/cce8480a-353b-4f9b-8f6f-b2f1e9ae601a.json b/data/hfopenllm_v2/Sakalti/ultiima-72B/cce8480a-353b-4f9b-8f6f-b2f1e9ae601a.json new file mode 100644 index 000000000..518d2e5a4 --- /dev/null +++ b/data/hfopenllm_v2/Sakalti/ultiima-72B/cce8480a-353b-4f9b-8f6f-b2f1e9ae601a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-72B/1762652579.8710139", + "retrieved_timestamp": "1762652579.8710148", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/ultiima-72B", + "developer": "Sakalti", + "inference_platform": "unknown", + "id": "Sakalti/ultiima-72B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7140121544169471 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7217809739144654 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5354984894259819 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41442953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46518750000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.590591755319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R/1bf65062-4526-407d-ba4f-866b045dbf3b.json b/data/hfopenllm_v2/Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R/1bf65062-4526-407d-ba4f-866b045dbf3b.json new file mode 100644 index 000000000..ca2ab5dc0 --- /dev/null +++ b/data/hfopenllm_v2/Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R/1bf65062-4526-407d-ba4f-866b045dbf3b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Salesforce_LLaMA-3-8B-SFR-Iterative-DPO-R/1762652579.8714519", + "retrieved_timestamp": "1762652579.8714519", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R", + "developer": "Salesforce", + "inference_platform": "unknown", + "id": "Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38156203318306536 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5011950469666927 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09138972809667674 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36333333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3172373670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SanjiWatsuki/Kunoichi-DPO-v2-7B/dc7243af-efa9-4169-8d31-36ef75dfe2e3.json b/data/hfopenllm_v2/SanjiWatsuki/Kunoichi-DPO-v2-7B/dc7243af-efa9-4169-8d31-36ef75dfe2e3.json new file mode 100644 index 000000000..90fa8e33c --- /dev/null +++ b/data/hfopenllm_v2/SanjiWatsuki/Kunoichi-DPO-v2-7B/dc7243af-efa9-4169-8d31-36ef75dfe2e3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SanjiWatsuki_Kunoichi-DPO-v2-7B/1762652579.871708", + "retrieved_timestamp": "1762652579.871708", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SanjiWatsuki/Kunoichi-DPO-v2-7B", + "developer": "SanjiWatsuki", + "inference_platform": "unknown", + "id": "SanjiWatsuki/Kunoichi-DPO-v2-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5431034100630772 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4415592450869275 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07628398791540786 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41883333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3106715425531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SanjiWatsuki/Silicon-Maid-7B/5d7ffac9-a734-44ef-aa1e-43ddbe68fd6a.json b/data/hfopenllm_v2/SanjiWatsuki/Silicon-Maid-7B/5d7ffac9-a734-44ef-aa1e-43ddbe68fd6a.json new file mode 100644 index 000000000..2a4979004 --- /dev/null +++ b/data/hfopenllm_v2/SanjiWatsuki/Silicon-Maid-7B/5d7ffac9-a734-44ef-aa1e-43ddbe68fd6a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SanjiWatsuki_Silicon-Maid-7B/1762652579.87197", + "retrieved_timestamp": "1762652579.8719711", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SanjiWatsuki/Silicon-Maid-7B", + "developer": "SanjiWatsuki", + "inference_platform": "unknown", + "id": "SanjiWatsuki/Silicon-Maid-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5367835121920947 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4127972831009074 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41883333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.308344414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sao10K/70B-L3.3-Cirrus-x1/660f8ede-1b7f-4438-8a97-51db77058725.json b/data/hfopenllm_v2/Sao10K/70B-L3.3-Cirrus-x1/660f8ede-1b7f-4438-8a97-51db77058725.json new file mode 100644 index 000000000..a80e9f8d5 --- /dev/null +++ b/data/hfopenllm_v2/Sao10K/70B-L3.3-Cirrus-x1/660f8ede-1b7f-4438-8a97-51db77058725.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sao10K_70B-L3.3-Cirrus-x1/1762652579.8721752", + "retrieved_timestamp": "1762652579.8721762", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sao10K/70B-L3.3-Cirrus-x1", + "developer": "Sao10K", + "inference_platform": "unknown", + "id": "Sao10K/70B-L3.3-Cirrus-x1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6680751517085777 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7028970787833794 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37386706948640486 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44966442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4841666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5378158244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sao10K/Fimbulvetr-11B-v2/135ade7c-f0d1-495a-a5b5-c95712cf0c0f.json b/data/hfopenllm_v2/Sao10K/Fimbulvetr-11B-v2/135ade7c-f0d1-495a-a5b5-c95712cf0c0f.json new file mode 100644 index 000000000..dd07ff86e --- /dev/null +++ b/data/hfopenllm_v2/Sao10K/Fimbulvetr-11B-v2/135ade7c-f0d1-495a-a5b5-c95712cf0c0f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sao10K_Fimbulvetr-11B-v2/1762652579.872427", + "retrieved_timestamp": "1762652579.872428", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sao10K/Fimbulvetr-11B-v2", + "developer": "Sao10K", + "inference_platform": "unknown", + "id": "Sao10K/Fimbulvetr-11B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5100056738343152 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4544495065184342 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43536458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33011968085106386 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/09aab7d9-93ac-4aff-840a-d4ccfb0b469d.json b/data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/09aab7d9-93ac-4aff-840a-d4ccfb0b469d.json new file mode 100644 index 000000000..e1507b172 --- /dev/null +++ b/data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/09aab7d9-93ac-4aff-840a-d4ccfb0b469d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sao10K_L3-70B-Euryale-v2.1/1762652579.872639", + "retrieved_timestamp": "1762652579.87264", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sao10K/L3-70B-Euryale-v2.1", + "developer": "Sao10K", + "inference_platform": "unknown", + "id": "Sao10K/L3-70B-Euryale-v2.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7384417789243651 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6471322811268715 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21374622356495468 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42091666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5103889627659575 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/d730a2be-1cd8-4851-9ecf-55139af1e8f7.json b/data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/d730a2be-1cd8-4851-9ecf-55139af1e8f7.json new file mode 100644 index 000000000..a895d5c8c --- /dev/null +++ b/data/hfopenllm_v2/Sao10K/L3-70B-Euryale-v2.1/d730a2be-1cd8-4851-9ecf-55139af1e8f7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sao10K_L3-70B-Euryale-v2.1/1762652579.872864", + "retrieved_timestamp": "1762652579.872865", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sao10K/L3-70B-Euryale-v2.1", + "developer": "Sao10K", + "inference_platform": "unknown", + "id": "Sao10K/L3-70B-Euryale-v2.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7281003293483512 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6502778992745041 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22432024169184292 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41958333333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5095578457446809 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sao10K/L3-8B-Lunaris-v1/e15ed4e3-d33f-4dad-98da-e1dad098a6a1.json b/data/hfopenllm_v2/Sao10K/L3-8B-Lunaris-v1/e15ed4e3-d33f-4dad-98da-e1dad098a6a1.json new file mode 100644 index 000000000..5fc29ffb5 --- /dev/null +++ b/data/hfopenllm_v2/Sao10K/L3-8B-Lunaris-v1/e15ed4e3-d33f-4dad-98da-e1dad098a6a1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sao10K_L3-8B-Lunaris-v1/1762652579.8733618", + "retrieved_timestamp": "1762652579.873365", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sao10K/L3-8B-Lunaris-v1", + "developer": "Sao10K", + "inference_platform": "unknown", + "id": "Sao10K/L3-8B-Lunaris-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6894573066131198 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5235299282515419 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09063444108761329 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3726666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3787400265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sao10K/L3-8B-Niitama-v1/9c10e944-3955-4478-9d07-f79769d6b884.json b/data/hfopenllm_v2/Sao10K/L3-8B-Niitama-v1/9c10e944-3955-4478-9d07-f79769d6b884.json new file mode 100644 index 000000000..48f7be488 --- /dev/null +++ b/data/hfopenllm_v2/Sao10K/L3-8B-Niitama-v1/9c10e944-3955-4478-9d07-f79769d6b884.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sao10K_L3-8B-Niitama-v1/1762652579.8737721", + "retrieved_timestamp": "1762652579.873773", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sao10K/L3-8B-Niitama-v1", + "developer": "Sao10K", + "inference_platform": "unknown", + "id": "Sao10K/L3-8B-Niitama-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6790659893526954 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5302980131787137 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09818731117824774 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3806666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3700964095744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.2/85a94072-ac79-4c14-abaa-9a6424a03ab5.json b/data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.2/85a94072-ac79-4c14-abaa-9a6424a03ab5.json new file mode 100644 index 000000000..2ea2067f9 --- /dev/null +++ b/data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.2/85a94072-ac79-4c14-abaa-9a6424a03ab5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sao10K_L3-8B-Stheno-v3.2/1762652579.8740559", + "retrieved_timestamp": "1762652579.874058", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sao10K/L3-8B-Stheno-v3.2", + "developer": "Sao10K", + "inference_platform": "unknown", + "id": "Sao10K/L3-8B-Stheno-v3.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6872841837435781 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.522778637171633 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09290030211480363 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3793645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3768284574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.3-32K/279b82ae-62b2-4703-85f2-1e79e42366f0.json b/data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.3-32K/279b82ae-62b2-4703-85f2-1e79e42366f0.json new file mode 100644 index 000000000..1a9774c9b --- /dev/null +++ b/data/hfopenllm_v2/Sao10K/L3-8B-Stheno-v3.3-32K/279b82ae-62b2-4703-85f2-1e79e42366f0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sao10K_L3-8B-Stheno-v3.3-32K/1762652579.874314", + "retrieved_timestamp": "1762652579.874315", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sao10K/L3-8B-Stheno-v3.3-32K", + "developer": "Sao10K", + "inference_platform": "unknown", + "id": "Sao10K/L3-8B-Stheno-v3.3-32K" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46037181345496614 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3844012923008206 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3725416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1895777925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sao10K/MN-12B-Lyra-v3/2c83813a-8254-4765-9367-efb9ad8c5e6c.json b/data/hfopenllm_v2/Sao10K/MN-12B-Lyra-v3/2c83813a-8254-4765-9367-efb9ad8c5e6c.json new file mode 100644 index 000000000..abcea9e76 --- /dev/null +++ b/data/hfopenllm_v2/Sao10K/MN-12B-Lyra-v3/2c83813a-8254-4765-9367-efb9ad8c5e6c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sao10K_MN-12B-Lyra-v3/1762652579.874634", + "retrieved_timestamp": "1762652579.874634", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sao10K/MN-12B-Lyra-v3", + "developer": "Sao10K", + "inference_platform": "unknown", + "id": "Sao10K/MN-12B-Lyra-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4486063644463357 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4803954360397243 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09365558912386707 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40190624999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32488364361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B/482fbdd6-6f39-4971-ac65-1e5e181b667f.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B/482fbdd6-6f39-4971-ac65-1e5e181b667f.json new file mode 100644 index 000000000..74486f516 --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B/482fbdd6-6f39-4971-ac65-1e5e181b667f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V1-32B/1762652579.874861", + "retrieved_timestamp": "1762652579.8748622", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B", + "developer": "Saxo", + "inference_platform": "unknown", + "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7971681804279312 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7000545067146033 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6027190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3624161073825503 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45378125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5792885638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.76 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B/0b1758f7-4aee-40a2-b33e-f519107b6687.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B/0b1758f7-4aee-40a2-b33e-f519107b6687.json new file mode 100644 index 000000000..4ea1f796e --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B/0b1758f7-4aee-40a2-b33e-f519107b6687.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V2-32B/1762652579.875268", + "retrieved_timestamp": "1762652579.8752692", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B", + "developer": "Saxo", + "inference_platform": "unknown", + "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7956444456264933 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7023193256341814 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41663541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5719747340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.76 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B/b206b1c9-3469-4b77-b85a-dcd3c6394c67.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B/b206b1c9-3469-4b77-b85a-dcd3c6394c67.json new file mode 100644 index 000000000..271439303 --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B/b206b1c9-3469-4b77-b85a-dcd3c6394c67.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V3-32B/1762652579.875521", + "retrieved_timestamp": "1762652579.8755221", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B", + "developer": "Saxo", + "inference_platform": "unknown", + "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8248702332034556 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6913199237437709 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6178247734138973 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42745833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.56640625 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B/52d4b2fe-cbd1-431f-b0e7-04ebfbe852ca.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B/52d4b2fe-cbd1-431f-b0e7-04ebfbe852ca.json new file mode 100644 index 000000000..7898ccfa6 --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B/52d4b2fe-cbd1-431f-b0e7-04ebfbe852ca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V4-32B/1762652579.87576", + "retrieved_timestamp": "1762652579.8757608", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B", + "developer": "Saxo", + "inference_platform": "unknown", + "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7630963620970137 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6920204096666581 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3615771812080537 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4642604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5752160904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B/b1b0aac0-2921-44ab-ac1b-873b715e9b52.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B/b1b0aac0-2921-44ab-ac1b-873b715e9b52.json new file mode 100644 index 000000000..c3bd55541 --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B/b1b0aac0-2921-44ab-ac1b-873b715e9b52.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V5-32B/1762652579.876068", + "retrieved_timestamp": "1762652579.876069", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B", + "developer": "Saxo", + "inference_platform": "unknown", + "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7515558717536137 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6928650089977083 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5460725075528701 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35570469798657717 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47086458333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5762134308510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B/977a0388-5c46-42ab-bb93-91f036963f8c.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B/977a0388-5c46-42ab-bb93-91f036963f8c.json new file mode 100644 index 000000000..968005636 --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B/977a0388-5c46-42ab-bb93-91f036963f8c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V6-32B/1762652579.87637", + "retrieved_timestamp": "1762652579.876371", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B", + "developer": "Saxo", + "inference_platform": "unknown", + "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8208985491828349 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6889783858832969 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.622356495468278 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3347315436241611 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42742708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5672373670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.76 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/52438151-a1c8-440c-a9be-3670b18c1ef6.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/52438151-a1c8-440c-a9be-3670b18c1ef6.json new file mode 100644 index 000000000..167e6457e --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/52438151-a1c8-440c-a9be-3670b18c1ef6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/1762652579.876656", + "retrieved_timestamp": "1762652579.876657", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B", + "developer": "Saxo", + "inference_platform": "unknown", + "id": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8145786513118525 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6463223196116569 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802114803625378 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34731543624161076 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4139375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45985704787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/993cc036-0e33-4d0e-b1b3-f97a9645f4c5.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/993cc036-0e33-4d0e-b1b3-f97a9645f4c5.json new file mode 100644 index 000000000..854f40a92 --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/993cc036-0e33-4d0e-b1b3-f97a9645f4c5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/1762652579.876898", + "retrieved_timestamp": "1762652579.876899", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B", + "developer": "Saxo", + "inference_platform": "unknown", + "id": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.81420408959339 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6403963618749583 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35906040268456374 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44667708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4523769946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B/53a6fd3e-37c5-4abc-b387-0ef9f4225760.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B/53a6fd3e-37c5-4abc-b387-0ef9f4225760.json new file mode 100644 index 000000000..28e55bd6c --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B/53a6fd3e-37c5-4abc-b387-0ef9f4225760.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Korean-Superb-22B/1762652579.877154", + "retrieved_timestamp": "1762652579.877155", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B", + "developer": "Saxo", + "inference_platform": "unknown", + "id": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6766679078179231 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5625539568927603 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23716012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3263422818791946 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3907708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3871343085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B/420f358d-c7a0-4bb5-9d0a-6c44e1f2a354.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B/420f358d-c7a0-4bb5-9d0a-6c44e1f2a354.json new file mode 100644 index 000000000..f9152b4a8 --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B/420f358d-c7a0-4bb5-9d0a-6c44e1f2a354.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Korean-Superb-27B/1762652579.87745", + "retrieved_timestamp": "1762652579.877451", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B", + "developer": "Saxo", + "inference_platform": "unknown", + "id": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7767601076255447 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6518345685119445 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2719033232628399 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3598993288590604 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47913541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4646775265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Superb-27B/e7007251-609e-4c81-86cf-d6fb79c896c2.json b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Superb-27B/e7007251-609e-4c81-86cf-d6fb79c896c2.json new file mode 100644 index 000000000..299ea70e5 --- /dev/null +++ b/data/hfopenllm_v2/Saxo/Linkbricks-Horizon-AI-Superb-27B/e7007251-609e-4c81-86cf-d6fb79c896c2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Superb-27B/1762652579.877677", + "retrieved_timestamp": "1762652579.8776782", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Saxo/Linkbricks-Horizon-AI-Superb-27B", + "developer": "Saxo", + "inference_platform": "unknown", + "id": "Saxo/Linkbricks-Horizon-AI-Superb-27B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7302235845334822 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6186245528925046 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22205438066465258 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3573825503355705 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.465 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.406000664893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2.5/7117b360-ef16-4da9-9226-b66b6aac9703.json b/data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2.5/7117b360-ef16-4da9-9226-b66b6aac9703.json new file mode 100644 index 000000000..981a8a267 --- /dev/null +++ b/data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2.5/7117b360-ef16-4da9-9226-b66b6aac9703.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SeaLLMs_SeaLLM-7B-v2.5/1762652579.878138", + "retrieved_timestamp": "1762652579.8781388", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SeaLLMs/SeaLLM-7B-v2.5", + "developer": "SeaLLMs", + "inference_platform": "unknown", + "id": "SeaLLMs/SeaLLM-7B-v2.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4521536190640833 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49802029594352754 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10876132930513595 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42032291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3203125 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2/8f41a438-e9b7-43c6-b0b2-447a71ac360f.json b/data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2/8f41a438-e9b7-43c6-b0b2-447a71ac360f.json new file mode 100644 index 000000000..e930772fe --- /dev/null +++ b/data/hfopenllm_v2/SeaLLMs/SeaLLM-7B-v2/8f41a438-e9b7-43c6-b0b2-447a71ac360f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SeaLLMs_SeaLLM-7B-v2/1762652579.877889", + "retrieved_timestamp": "1762652579.877889", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SeaLLMs/SeaLLM-7B-v2", + "developer": "SeaLLMs", + "inference_platform": "unknown", + "id": "SeaLLMs/SeaLLM-7B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36712367629002157 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4902100795458318 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08534743202416918 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4069583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30826130319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.376 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SeaLLMs/SeaLLMs-v3-7B-Chat/f119b2b5-2303-4772-9ae0-ce8f573f86c3.json b/data/hfopenllm_v2/SeaLLMs/SeaLLMs-v3-7B-Chat/f119b2b5-2303-4772-9ae0-ce8f573f86c3.json new file mode 100644 index 000000000..88b6f08c8 --- /dev/null +++ b/data/hfopenllm_v2/SeaLLMs/SeaLLMs-v3-7B-Chat/f119b2b5-2303-4772-9ae0-ce8f573f86c3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SeaLLMs_SeaLLMs-v3-7B-Chat/1762652579.8783438", + "retrieved_timestamp": "1762652579.878345", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SeaLLMs/SeaLLMs-v3-7B-Chat", + "developer": "SeaLLMs", + "inference_platform": "unknown", + "id": "SeaLLMs/SeaLLMs-v3-7B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43766539448662883 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5266406284595359 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18580060422960726 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.417375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3894614361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SenseLLM/ReflectionCoder-CL-34B/5d7a3d90-8017-4415-a1da-eb70f6145fe4.json b/data/hfopenllm_v2/SenseLLM/ReflectionCoder-CL-34B/5d7a3d90-8017-4415-a1da-eb70f6145fe4.json new file mode 100644 index 000000000..37070d00b --- /dev/null +++ b/data/hfopenllm_v2/SenseLLM/ReflectionCoder-CL-34B/5d7a3d90-8017-4415-a1da-eb70f6145fe4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SenseLLM_ReflectionCoder-CL-34B/1762652579.8785448", + "retrieved_timestamp": "1762652579.878546", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SenseLLM/ReflectionCoder-CL-34B", + "developer": "SenseLLM", + "inference_platform": "unknown", + "id": "SenseLLM/ReflectionCoder-CL-34B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4007710652180658 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39529304297033296 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03323262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25083892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41548958333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14237034574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 33.744 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SenseLLM/ReflectionCoder-DS-33B/2ee4584d-b18c-44dd-af63-22c28b92e107.json b/data/hfopenllm_v2/SenseLLM/ReflectionCoder-DS-33B/2ee4584d-b18c-44dd-af63-22c28b92e107.json new file mode 100644 index 000000000..407b904df --- /dev/null +++ b/data/hfopenllm_v2/SenseLLM/ReflectionCoder-DS-33B/2ee4584d-b18c-44dd-af63-22c28b92e107.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SenseLLM_ReflectionCoder-DS-33B/1762652579.878793", + "retrieved_timestamp": "1762652579.878794", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SenseLLM/ReflectionCoder-DS-33B", + "developer": "SenseLLM", + "inference_platform": "unknown", + "id": "SenseLLM/ReflectionCoder-DS-33B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3786641666334215 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3449447540164568 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030211480362537766 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3343125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12017952127659574 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 33.34 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/ff284b60-0c7c-4825-af77-5922831cb3b8.json b/data/hfopenllm_v2/SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/ff284b60-0c7c-4825-af77-5922831cb3b8.json new file mode 100644 index 000000000..6a6f30331 --- /dev/null +++ b/data/hfopenllm_v2/SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/ff284b60-0c7c-4825-af77-5922831cb3b8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SeppeV_SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/1762652579.879464", + "retrieved_timestamp": "1762652579.8794649", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo", + "developer": "SeppeV", + "inference_platform": "unknown", + "id": "SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09554648333089535 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3072665948660797 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40320833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11610704787234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sharathhebbar24/SSH_355M/9ff82d83-2a89-48d8-8ad0-91637a77bc76.json b/data/hfopenllm_v2/Sharathhebbar24/SSH_355M/9ff82d83-2a89-48d8-8ad0-91637a77bc76.json new file mode 100644 index 000000000..7bd9dda27 --- /dev/null +++ b/data/hfopenllm_v2/Sharathhebbar24/SSH_355M/9ff82d83-2a89-48d8-8ad0-91637a77bc76.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sharathhebbar24_SSH_355M/1762652579.8797262", + "retrieved_timestamp": "1762652579.8797271", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sharathhebbar24/SSH_355M", + "developer": "Sharathhebbar24", + "inference_platform": "unknown", + "id": "Sharathhebbar24/SSH_355M" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1423589409433636 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30985907344593705 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41775 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11760305851063829 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.355 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Shreyash2010/Uma-4x4B-Instruct-v0.1/83fa529b-8c61-4017-92a8-ec0f46eb7bba.json b/data/hfopenllm_v2/Shreyash2010/Uma-4x4B-Instruct-v0.1/83fa529b-8c61-4017-92a8-ec0f46eb7bba.json new file mode 100644 index 000000000..f384db25b --- /dev/null +++ b/data/hfopenllm_v2/Shreyash2010/Uma-4x4B-Instruct-v0.1/83fa529b-8c61-4017-92a8-ec0f46eb7bba.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Shreyash2010_Uma-4x4B-Instruct-v0.1/1762652579.880244", + "retrieved_timestamp": "1762652579.880245", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Shreyash2010/Uma-4x4B-Instruct-v0.1", + "developer": "Shreyash2010", + "inference_platform": "unknown", + "id": "Shreyash2010/Uma-4x4B-Instruct-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5516961661724225 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5511602059856503 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17749244712990936 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3347315436241611 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4441041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.386968085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sicarius-Prototyping/Micropenis_1B/1ce9038a-7f1f-4b79-9fbc-9e78660094b3.json b/data/hfopenllm_v2/Sicarius-Prototyping/Micropenis_1B/1ce9038a-7f1f-4b79-9fbc-9e78660094b3.json new file mode 100644 index 000000000..b830d4d2b --- /dev/null +++ b/data/hfopenllm_v2/Sicarius-Prototyping/Micropenis_1B/1ce9038a-7f1f-4b79-9fbc-9e78660094b3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sicarius-Prototyping_Micropenis_1B/1762652579.8808", + "retrieved_timestamp": "1762652579.880801", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sicarius-Prototyping/Micropenis_1B", + "developer": "Sicarius-Prototyping", + "inference_platform": "unknown", + "id": "Sicarius-Prototyping/Micropenis_1B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3460662154195313 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3372377910880025 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04607250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3325416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18600398936170212 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.618 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sicarius-Prototyping/bacon_and_food/af3374c8-5a23-4a87-990b-123803107ed8.json b/data/hfopenllm_v2/Sicarius-Prototyping/bacon_and_food/af3374c8-5a23-4a87-990b-123803107ed8.json new file mode 100644 index 000000000..82d0c6e35 --- /dev/null +++ b/data/hfopenllm_v2/Sicarius-Prototyping/bacon_and_food/af3374c8-5a23-4a87-990b-123803107ed8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sicarius-Prototyping_bacon_and_food/1762652579.881054", + "retrieved_timestamp": "1762652579.881054", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sicarius-Prototyping/bacon_and_food", + "developer": "Sicarius-Prototyping", + "inference_platform": "unknown", + "id": "Sicarius-Prototyping/bacon_and_food" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5860428108529812 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47245798883729967 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09818731117824774 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3883854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3262965425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/2B-ad/31fd60ef-db8f-4785-b486-7a06f1cdf981.json b/data/hfopenllm_v2/SicariusSicariiStuff/2B-ad/31fd60ef-db8f-4785-b486-7a06f1cdf981.json new file mode 100644 index 000000000..93074dbe1 --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/2B-ad/31fd60ef-db8f-4785-b486-7a06f1cdf981.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_2B-ad/1762652579.88126", + "retrieved_timestamp": "1762652579.881261", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SicariusSicariiStuff/2B-ad", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "id": "SicariusSicariiStuff/2B-ad" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4378903531518593 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40922431523996955 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40153124999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2662067819148936 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 3.204 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/2B_or_not_2B/983cf552-1ab1-49ba-aab0-1e644e9a7acb.json b/data/hfopenllm_v2/SicariusSicariiStuff/2B_or_not_2B/983cf552-1ab1-49ba-aab0-1e644e9a7acb.json new file mode 100644 index 000000000..1f77cfec6 --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/2B_or_not_2B/983cf552-1ab1-49ba-aab0-1e644e9a7acb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_2B_or_not_2B/1762652579.881506", + "retrieved_timestamp": "1762652579.881506", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SicariusSicariiStuff/2B_or_not_2B", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "id": "SicariusSicariiStuff/2B_or_not_2B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2062316874781136 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3415917024092019 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24748322147651006 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3790833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13987699468085107 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Dusk_Rainbow/e8f1d0e1-4086-4645-983b-b9470a22b522.json b/data/hfopenllm_v2/SicariusSicariiStuff/Dusk_Rainbow/e8f1d0e1-4086-4645-983b-b9470a22b522.json new file mode 100644 index 000000000..8b6937a6f --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Dusk_Rainbow/e8f1d0e1-4086-4645-983b-b9470a22b522.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Dusk_Rainbow/1762652579.881711", + "retrieved_timestamp": "1762652579.8817122", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SicariusSicariiStuff/Dusk_Rainbow", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "id": "SicariusSicariiStuff/Dusk_Rainbow" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3588057465303173 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47717504280736184 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07477341389728097 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40252083333333327 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3443317819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Eximius_Persona_5B/98406fba-a2e4-4afd-a121-e33a723d2eb6.json b/data/hfopenllm_v2/SicariusSicariiStuff/Eximius_Persona_5B/98406fba-a2e4-4afd-a121-e33a723d2eb6.json new file mode 100644 index 000000000..b931ebf82 --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Eximius_Persona_5B/98406fba-a2e4-4afd-a121-e33a723d2eb6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Eximius_Persona_5B/1762652579.881908", + "retrieved_timestamp": "1762652579.881909", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SicariusSicariiStuff/Eximius_Persona_5B", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "id": "SicariusSicariiStuff/Eximius_Persona_5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6559850086658954 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4511736018571028 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10196374622356495 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38181249999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31399601063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 5.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Impish_Mind_8B/3a0633f1-070a-416d-a7ab-f41dd44f577d.json b/data/hfopenllm_v2/SicariusSicariiStuff/Impish_Mind_8B/3a0633f1-070a-416d-a7ab-f41dd44f577d.json new file mode 100644 index 000000000..d20846648 --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Impish_Mind_8B/3a0633f1-070a-416d-a7ab-f41dd44f577d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Impish_Mind_8B/1762652579.8823712", + "retrieved_timestamp": "1762652579.8823712", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SicariusSicariiStuff/Impish_Mind_8B", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "id": "SicariusSicariiStuff/Impish_Mind_8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31791424531354584 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46736571616627115 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10498489425981873 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4069583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3308676861702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Redemption_Wind_24B/21216e0b-dc97-4502-ba3d-d47ad1ac73b2.json b/data/hfopenllm_v2/SicariusSicariiStuff/Redemption_Wind_24B/21216e0b-dc97-4502-ba3d-d47ad1ac73b2.json new file mode 100644 index 000000000..a06b7f3e4 --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Redemption_Wind_24B/21216e0b-dc97-4502-ba3d-d47ad1ac73b2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Redemption_Wind_24B/1762652579.8843782", + "retrieved_timestamp": "1762652579.884379", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SicariusSicariiStuff/Redemption_Wind_24B", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "id": "SicariusSicariiStuff/Redemption_Wind_24B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25014517037017336 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.642816406969129 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18580060422960726 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38338926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4262395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.543218085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Winged_Imp_8B/dd1936aa-9b21-466d-b74a-807fafd9f24a.json b/data/hfopenllm_v2/SicariusSicariiStuff/Winged_Imp_8B/dd1936aa-9b21-466d-b74a-807fafd9f24a.json new file mode 100644 index 000000000..e6afc71c9 --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Winged_Imp_8B/dd1936aa-9b21-466d-b74a-807fafd9f24a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Winged_Imp_8B/1762652579.8845959", + "retrieved_timestamp": "1762652579.884597", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SicariusSicariiStuff/Winged_Imp_8B", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "id": "SicariusSicariiStuff/Winged_Imp_8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.743012983328679 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5120376322048542 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12009063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41483333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3638630319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Wingless_Imp_8B/2304646d-a399-40c0-8577-0bab9ad2ff3c.json b/data/hfopenllm_v2/SicariusSicariiStuff/Wingless_Imp_8B/2304646d-a399-40c0-8577-0bab9ad2ff3c.json new file mode 100644 index 000000000..5c5fa33b2 --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Wingless_Imp_8B/2304646d-a399-40c0-8577-0bab9ad2ff3c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Wingless_Imp_8B/1762652579.8848069", + "retrieved_timestamp": "1762652579.8848078", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SicariusSicariiStuff/Wingless_Imp_8B", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "id": "SicariusSicariiStuff/Wingless_Imp_8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.743012983328679 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5120376322048542 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12009063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41483333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3638630319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/Zion_Alpha/9d6d36b1-f8ad-4cc8-b904-c7e3b0a923e4.json b/data/hfopenllm_v2/SicariusSicariiStuff/Zion_Alpha/9d6d36b1-f8ad-4cc8-b904-c7e3b0a923e4.json new file mode 100644 index 000000000..c60e4aa61 --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/Zion_Alpha/9d6d36b1-f8ad-4cc8-b904-c7e3b0a923e4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Zion_Alpha/1762652579.885025", + "retrieved_timestamp": "1762652579.885026", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SicariusSicariiStuff/Zion_Alpha", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "id": "SicariusSicariiStuff/Zion_Alpha" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3324024698910003 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49321099934509743 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05211480362537765 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4726875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31316489361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SicariusSicariiStuff/dn_ep02/f7f3caa2-0468-4dfb-a817-bb5cdc977911.json b/data/hfopenllm_v2/SicariusSicariiStuff/dn_ep02/f7f3caa2-0468-4dfb-a817-bb5cdc977911.json new file mode 100644 index 000000000..8d1843bfb --- /dev/null +++ b/data/hfopenllm_v2/SicariusSicariiStuff/dn_ep02/f7f3caa2-0468-4dfb-a817-bb5cdc977911.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_dn_ep02/1762652579.885246", + "retrieved_timestamp": "1762652579.885247", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SicariusSicariiStuff/dn_ep02", + "developer": "SicariusSicariiStuff", + "inference_platform": "unknown", + "id": "SicariusSicariiStuff/dn_ep02" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5064340394597445 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5266008759836228 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1419939577039275 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43163541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39976728723404253 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct/bdcf5d38-55d2-4f55-8bd1-7f4cd94f758c.json b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct/bdcf5d38-55d2-4f55-8bd1-7f4cd94f758c.json new file mode 100644 index 000000000..c224bff81 --- /dev/null +++ b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct/bdcf5d38-55d2-4f55-8bd1-7f4cd94f758c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Qwen2.5-3B-Instruct/1762652579.887695", + "retrieved_timestamp": "1762652579.8876958", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct", + "developer": "SkyOrbis", + "inference_platform": "unknown", + "id": "SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3534100630770799 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4264821228336018 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06948640483383686 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40236458333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28116688829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/7875e792-80dd-4fa8-9743-b8ef42a4cdb7.json b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/7875e792-80dd-4fa8-9743-b8ef42a4cdb7.json new file mode 100644 index 000000000..5c393fbac --- /dev/null +++ b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/7875e792-80dd-4fa8-9743-b8ef42a4cdb7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/1762652579.888021", + "retrieved_timestamp": "1762652579.888022", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000", + "developer": "SkyOrbis", + "inference_platform": "unknown", + "id": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38188672721711725 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5077962006048589 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1865558912386707 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271812080536913 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44360416666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3913730053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/9354b915-68cd-47ca-a1e8-7481a8b33c49.json b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/9354b915-68cd-47ca-a1e8-7481a8b33c49.json new file mode 100644 index 000000000..1fc7e1e52 --- /dev/null +++ b/data/hfopenllm_v2/SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/9354b915-68cd-47ca-a1e8-7481a8b33c49.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/1762652579.8882601", + "retrieved_timestamp": "1762652579.888261", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000", + "developer": "SkyOrbis", + "inference_platform": "unknown", + "id": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3812373391490135 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5389864554242366 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20996978851963746 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4237916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42378656914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Solshine/Brimful-merged-replete/6523a08c-7a43-4784-9650-e1d5144fcfcf.json b/data/hfopenllm_v2/Solshine/Brimful-merged-replete/6523a08c-7a43-4784-9650-e1d5144fcfcf.json new file mode 100644 index 000000000..6a47de4bb --- /dev/null +++ b/data/hfopenllm_v2/Solshine/Brimful-merged-replete/6523a08c-7a43-4784-9650-e1d5144fcfcf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Solshine_Brimful-merged-replete/1762652579.8890932", + "retrieved_timestamp": "1762652579.8890939", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Solshine/Brimful-merged-replete", + "developer": "Solshine", + "inference_platform": "unknown", + "id": "Solshine/Brimful-merged-replete" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17605619755581856 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28834447696551024 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0030211480362537764 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.342125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10846077127659574 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 12.277 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sourjayon/DeepSeek-R1-8b-Sify/55a6c2c7-d29e-43a2-abd6-435117967a5d.json b/data/hfopenllm_v2/Sourjayon/DeepSeek-R1-8b-Sify/55a6c2c7-d29e-43a2-abd6-435117967a5d.json new file mode 100644 index 000000000..22ae550c0 --- /dev/null +++ b/data/hfopenllm_v2/Sourjayon/DeepSeek-R1-8b-Sify/55a6c2c7-d29e-43a2-abd6-435117967a5d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sourjayon_DeepSeek-R1-8b-Sify/1762652579.89035", + "retrieved_timestamp": "1762652579.890351", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sourjayon/DeepSeek-R1-8b-Sify", + "developer": "Sourjayon", + "inference_platform": "unknown", + "id": "Sourjayon/DeepSeek-R1-8b-Sify" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3679481553389451 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33793580116642347 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24471299093655588 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3303125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19805518617021275 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Sourjayon/DeepSeek-R1-ForumNXT/101d8dec-2e39-47d1-b76d-d91d6562feff.json b/data/hfopenllm_v2/Sourjayon/DeepSeek-R1-ForumNXT/101d8dec-2e39-47d1-b76d-d91d6562feff.json new file mode 100644 index 000000000..74cf27e5d --- /dev/null +++ b/data/hfopenllm_v2/Sourjayon/DeepSeek-R1-ForumNXT/101d8dec-2e39-47d1-b76d-d91d6562feff.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sourjayon_DeepSeek-R1-ForumNXT/1762652579.890614", + "retrieved_timestamp": "1762652579.890615", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sourjayon/DeepSeek-R1-ForumNXT", + "developer": "Sourjayon", + "inference_platform": "unknown", + "id": "Sourjayon/DeepSeek-R1-ForumNXT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26028714920854445 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3310198487331462 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25755287009063443 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3392395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16481050531914893 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SpaceYL/ECE_Poirot/32feb55a-fde5-4bbd-b93e-abffc1a7e573.json b/data/hfopenllm_v2/SpaceYL/ECE_Poirot/32feb55a-fde5-4bbd-b93e-abffc1a7e573.json new file mode 100644 index 000000000..f03de3e07 --- /dev/null +++ b/data/hfopenllm_v2/SpaceYL/ECE_Poirot/32feb55a-fde5-4bbd-b93e-abffc1a7e573.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SpaceYL_ECE_Poirot/1762652579.890822", + "retrieved_timestamp": "1762652579.890822", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SpaceYL/ECE_Poirot", + "developer": "SpaceYL", + "inference_platform": "unknown", + "id": "SpaceYL/ECE_Poirot" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3106956209524063 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42622349736626014 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09138972809667674 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40264583333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2883144946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Spestly/Athena-1-3B/29d6834e-38f7-472f-86be-79a8fce03989.json b/data/hfopenllm_v2/Spestly/Athena-1-3B/29d6834e-38f7-472f-86be-79a8fce03989.json new file mode 100644 index 000000000..306d6037c --- /dev/null +++ b/data/hfopenllm_v2/Spestly/Athena-1-3B/29d6834e-38f7-472f-86be-79a8fce03989.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Spestly_Athena-1-3B/1762652579.8910668", + "retrieved_timestamp": "1762652579.891068", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Spestly/Athena-1-3B", + "developer": "Spestly", + "inference_platform": "unknown", + "id": "Spestly/Athena-1-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5569167586448401 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47015477265388084 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23791540785498488 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43622916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35189494680851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Spestly/Atlas-Pro-1.5B-Preview/8282705f-6b69-40c2-825d-8e0c72756083.json b/data/hfopenllm_v2/Spestly/Atlas-Pro-1.5B-Preview/8282705f-6b69-40c2-825d-8e0c72756083.json new file mode 100644 index 000000000..b65bf5e42 --- /dev/null +++ b/data/hfopenllm_v2/Spestly/Atlas-Pro-1.5B-Preview/8282705f-6b69-40c2-825d-8e0c72756083.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Spestly_Atlas-Pro-1.5B-Preview/1762652579.891309", + "retrieved_timestamp": "1762652579.89131", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Spestly/Atlas-Pro-1.5B-Preview", + "developer": "Spestly", + "inference_platform": "unknown", + "id": "Spestly/Atlas-Pro-1.5B-Preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2429509257658568 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.349893585329524 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31948640483383683 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3354270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1924867021276596 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Spestly/Atlas-Pro-7B-Preview/57a36976-0868-462e-ab57-3addef7ea2f9.json b/data/hfopenllm_v2/Spestly/Atlas-Pro-7B-Preview/57a36976-0868-462e-ab57-3addef7ea2f9.json new file mode 100644 index 000000000..55a5565d0 --- /dev/null +++ b/data/hfopenllm_v2/Spestly/Atlas-Pro-7B-Preview/57a36976-0868-462e-ab57-3addef7ea2f9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Spestly_Atlas-Pro-7B-Preview/1762652579.891519", + "retrieved_timestamp": "1762652579.89152", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Spestly/Atlas-Pro-7B-Preview", + "developer": "Spestly", + "inference_platform": "unknown", + "id": "Spestly/Atlas-Pro-7B-Preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31541642840995227 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46679203304308553 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5083081570996979 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.337248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3910833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2970412234042553 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Stark2008/GutenLaserPi/e418f7d1-8fd6-44ea-bc33-62fb525589f1.json b/data/hfopenllm_v2/Stark2008/GutenLaserPi/e418f7d1-8fd6-44ea-bc33-62fb525589f1.json new file mode 100644 index 000000000..ef3a16d51 --- /dev/null +++ b/data/hfopenllm_v2/Stark2008/GutenLaserPi/e418f7d1-8fd6-44ea-bc33-62fb525589f1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Stark2008_GutenLaserPi/1762652579.891723", + "retrieved_timestamp": "1762652579.891723", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Stark2008/GutenLaserPi", + "developer": "Stark2008", + "inference_platform": "unknown", + "id": "Stark2008/GutenLaserPi" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42265300513747966 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5212342482489518 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07854984894259819 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4620208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31058843085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Stark2008/LayleleFlamPi/c12a519e-9d34-4671-8e98-c69178e08ec0.json b/data/hfopenllm_v2/Stark2008/LayleleFlamPi/c12a519e-9d34-4671-8e98-c69178e08ec0.json new file mode 100644 index 000000000..eea5e7327 --- /dev/null +++ b/data/hfopenllm_v2/Stark2008/LayleleFlamPi/c12a519e-9d34-4671-8e98-c69178e08ec0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Stark2008_LayleleFlamPi/1762652579.8919628", + "retrieved_timestamp": "1762652579.891964", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Stark2008/LayleleFlamPi", + "developer": "Stark2008", + "inference_platform": "unknown", + "id": "Stark2008/LayleleFlamPi" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42842325030917966 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5115654142581095 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46084375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3093417553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Stark2008/VisFlamCat/ed5f857e-6799-4729-a2e5-afbea4b89ecd.json b/data/hfopenllm_v2/Stark2008/VisFlamCat/ed5f857e-6799-4729-a2e5-afbea4b89ecd.json new file mode 100644 index 000000000..110ddf110 --- /dev/null +++ b/data/hfopenllm_v2/Stark2008/VisFlamCat/ed5f857e-6799-4729-a2e5-afbea4b89ecd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Stark2008_VisFlamCat/1762652579.892166", + "retrieved_timestamp": "1762652579.892166", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Stark2008/VisFlamCat", + "developer": "Stark2008", + "inference_platform": "unknown", + "id": "Stark2008/VisFlamCat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43659157701565177 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5216957865099948 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07628398791540786 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44627083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31441156914893614 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Steelskull/L3.3-MS-Nevoria-70b/5db5f87b-9bb0-4d29-b578-72bb896f3359.json b/data/hfopenllm_v2/Steelskull/L3.3-MS-Nevoria-70b/5db5f87b-9bb0-4d29-b578-72bb896f3359.json new file mode 100644 index 000000000..9ed5f2581 --- /dev/null +++ b/data/hfopenllm_v2/Steelskull/L3.3-MS-Nevoria-70b/5db5f87b-9bb0-4d29-b578-72bb896f3359.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Steelskull_L3.3-MS-Nevoria-70b/1762652579.8924139", + "retrieved_timestamp": "1762652579.892415", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Steelskull/L3.3-MS-Nevoria-70b", + "developer": "Steelskull", + "inference_platform": "unknown", + "id": "Steelskull/L3.3-MS-Nevoria-70b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6963268571833845 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6997536580025828 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3957703927492447 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47063758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4682291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5535239361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Steelskull/L3.3-Nevoria-R1-70b/1465ebc9-f2c3-46df-b5e1-37e7a027fde8.json b/data/hfopenllm_v2/Steelskull/L3.3-Nevoria-R1-70b/1465ebc9-f2c3-46df-b5e1-37e7a027fde8.json new file mode 100644 index 000000000..ea033a79a --- /dev/null +++ b/data/hfopenllm_v2/Steelskull/L3.3-Nevoria-R1-70b/1465ebc9-f2c3-46df-b5e1-37e7a027fde8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Steelskull_L3.3-Nevoria-R1-70b/1762652579.892649", + "retrieved_timestamp": "1762652579.89265", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Steelskull/L3.3-Nevoria-R1-70b", + "developer": "Steelskull", + "inference_platform": "unknown", + "id": "Steelskull/L3.3-Nevoria-R1-70b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6023794642659255 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6971668662651651 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46895973154362414 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47753125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5462932180851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/StelleX/Vorisatex-7B-preview/875156be-2ff9-4ec4-8085-27f22fb19259.json b/data/hfopenllm_v2/StelleX/Vorisatex-7B-preview/875156be-2ff9-4ec4-8085-27f22fb19259.json new file mode 100644 index 000000000..a66bd76cd --- /dev/null +++ b/data/hfopenllm_v2/StelleX/Vorisatex-7B-preview/875156be-2ff9-4ec4-8085-27f22fb19259.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/StelleX_Vorisatex-7B-preview/1762652579.893095", + "retrieved_timestamp": "1762652579.893096", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "StelleX/Vorisatex-7B-preview", + "developer": "StelleX", + "inference_platform": "unknown", + "id": "StelleX/Vorisatex-7B-preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1515013497519914 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3111695757290421 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028700906344410877 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41923958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11660571808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Instruct/1b0bd686-fd26-441f-b280-97b10bb1449c.json b/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Instruct/1b0bd686-fd26-441f-b280-97b10bb1449c.json new file mode 100644 index 000000000..83c7ff8de --- /dev/null +++ b/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Instruct/1b0bd686-fd26-441f-b280-97b10bb1449c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SultanR_SmolTulu-1.7b-Instruct/1762652579.893334", + "retrieved_timestamp": "1762652579.893334", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SultanR/SmolTulu-1.7b-Instruct", + "developer": "SultanR", + "inference_platform": "unknown", + "id": "SultanR/SmolTulu-1.7b-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6540867121459949 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3713086260572204 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07930513595166164 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35403125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17104388297872342 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.711 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Reinforced/224b4cbc-e36c-4f68-9918-edbdaf947191.json b/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Reinforced/224b4cbc-e36c-4f68-9918-edbdaf947191.json new file mode 100644 index 000000000..b559221a0 --- /dev/null +++ b/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-Reinforced/224b4cbc-e36c-4f68-9918-edbdaf947191.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SultanR_SmolTulu-1.7b-Reinforced/1762652579.893585", + "retrieved_timestamp": "1762652579.893586", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SultanR/SmolTulu-1.7b-Reinforced", + "developer": "SultanR", + "inference_platform": "unknown", + "id": "SultanR/SmolTulu-1.7b-Reinforced" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6790659893526954 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3551868188444029 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07175226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34060416666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17627992021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.711 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-it-v0/22ea218a-e3be-4e05-9a94-af716bb3a624.json b/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-it-v0/22ea218a-e3be-4e05-9a94-af716bb3a624.json new file mode 100644 index 000000000..f60ab4a0e --- /dev/null +++ b/data/hfopenllm_v2/SultanR/SmolTulu-1.7b-it-v0/22ea218a-e3be-4e05-9a94-af716bb3a624.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SultanR_SmolTulu-1.7b-it-v0/1762652579.8938031", + "retrieved_timestamp": "1762652579.8938031", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SultanR/SmolTulu-1.7b-it-v0", + "developer": "SultanR", + "inference_platform": "unknown", + "id": "SultanR/SmolTulu-1.7b-it-v0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6540867121459949 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3713086260572204 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07930513595166164 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35403125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17104388297872342 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.711 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/BBA-123/a469604f-f755-46e0-8b1c-db4a365dec34.json b/data/hfopenllm_v2/Supichi/BBA-123/a469604f-f755-46e0-8b1c-db4a365dec34.json new file mode 100644 index 000000000..e276ae9a6 --- /dev/null +++ b/data/hfopenllm_v2/Supichi/BBA-123/a469604f-f755-46e0-8b1c-db4a365dec34.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Supichi_BBA-123/1762652579.894015", + "retrieved_timestamp": "1762652579.894016", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Supichi/BBA-123", + "developer": "Supichi", + "inference_platform": "unknown", + "id": "Supichi/BBA-123" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2079548930171944 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2920111436321769 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34990625000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11668882978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 17.161 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/BBA99/fa793cb5-5522-4777-8d6f-e4719a51f767.json b/data/hfopenllm_v2/Supichi/BBA99/fa793cb5-5522-4777-8d6f-e4719a51f767.json new file mode 100644 index 000000000..5d13813d0 --- /dev/null +++ b/data/hfopenllm_v2/Supichi/BBA99/fa793cb5-5522-4777-8d6f-e4719a51f767.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Supichi_BBA99/1762652579.8942661", + "retrieved_timestamp": "1762652579.8942661", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Supichi/BBA99", + "developer": "Supichi", + "inference_platform": "unknown", + "id": "Supichi/BBA99" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14066011516110588 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2768958340020912 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32184375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11120345744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 17.161 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/BBAIK29/de5f2ab9-f1d2-49bc-9771-41b9da1bdfa3.json b/data/hfopenllm_v2/Supichi/BBAIK29/de5f2ab9-f1d2-49bc-9771-41b9da1bdfa3.json new file mode 100644 index 000000000..59abdbd1d --- /dev/null +++ b/data/hfopenllm_v2/Supichi/BBAIK29/de5f2ab9-f1d2-49bc-9771-41b9da1bdfa3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Supichi_BBAIK29/1762652579.89447", + "retrieved_timestamp": "1762652579.894471", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Supichi/BBAIK29", + "developer": "Supichi", + "inference_platform": "unknown", + "id": "Supichi/BBAIK29" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45884807865352817 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5589641249478369 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3678247734138973 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45008333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4468916223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/BBAI_250_Xia0_gZ/068a06f4-3fdc-495f-b7e4-0effebe24e42.json b/data/hfopenllm_v2/Supichi/BBAI_250_Xia0_gZ/068a06f4-3fdc-495f-b7e4-0effebe24e42.json new file mode 100644 index 000000000..2fa4d38cd --- /dev/null +++ b/data/hfopenllm_v2/Supichi/BBAI_250_Xia0_gZ/068a06f4-3fdc-495f-b7e4-0effebe24e42.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Supichi_BBAI_250_Xia0_gZ/1762652579.894933", + "retrieved_timestamp": "1762652579.894933", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Supichi/BBAI_250_Xia0_gZ", + "developer": "Supichi", + "inference_platform": "unknown", + "id": "Supichi/BBAI_250_Xia0_gZ" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4685401401614383 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5567682997527722 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3640483383685801 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4579270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4464760638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/BBAI_275_Tsunami_gZ/173028b9-03e3-44d7-a7e9-2c0c5c6f4b4e.json b/data/hfopenllm_v2/Supichi/BBAI_275_Tsunami_gZ/173028b9-03e3-44d7-a7e9-2c0c5c6f4b4e.json new file mode 100644 index 000000000..aad977057 --- /dev/null +++ b/data/hfopenllm_v2/Supichi/BBAI_275_Tsunami_gZ/173028b9-03e3-44d7-a7e9-2c0c5c6f4b4e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Supichi_BBAI_275_Tsunami_gZ/1762652579.895135", + "retrieved_timestamp": "1762652579.895135", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Supichi/BBAI_275_Tsunami_gZ", + "developer": "Supichi", + "inference_platform": "unknown", + "id": "Supichi/BBAI_275_Tsunami_gZ" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5369586031729146 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5531259476127334 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3285498489425982 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44478124999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44921875 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/BBAI_525_Tsu_gZ_Xia0/6b6b273e-9cf0-405e-b1e4-5fdbd2ae16d9.json b/data/hfopenllm_v2/Supichi/BBAI_525_Tsu_gZ_Xia0/6b6b273e-9cf0-405e-b1e4-5fdbd2ae16d9.json new file mode 100644 index 000000000..3baa32beb --- /dev/null +++ b/data/hfopenllm_v2/Supichi/BBAI_525_Tsu_gZ_Xia0/6b6b273e-9cf0-405e-b1e4-5fdbd2ae16d9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Supichi_BBAI_525_Tsu_gZ_Xia0/1762652579.8953412", + "retrieved_timestamp": "1762652579.8953412", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Supichi/BBAI_525_Tsu_gZ_Xia0", + "developer": "Supichi", + "inference_platform": "unknown", + "id": "Supichi/BBAI_525_Tsu_gZ_Xia0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5338612658856279 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5561933633430705 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3429003021148036 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44744791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44772273936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/BBAI_78B_Calme_3_1_Ties/a9c4a482-6b02-4cf6-a7d5-3e16334df634.json b/data/hfopenllm_v2/Supichi/BBAI_78B_Calme_3_1_Ties/a9c4a482-6b02-4cf6-a7d5-3e16334df634.json new file mode 100644 index 000000000..68c62d73a --- /dev/null +++ b/data/hfopenllm_v2/Supichi/BBAI_78B_Calme_3_1_Ties/a9c4a482-6b02-4cf6-a7d5-3e16334df634.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Supichi_BBAI_78B_Calme_3_1_Ties/1762652579.895541", + "retrieved_timestamp": "1762652579.895541", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Supichi/BBAI_78B_Calme_3_1_Ties", + "developer": "Supichi", + "inference_platform": "unknown", + "id": "Supichi/BBAI_78B_Calme_3_1_Ties" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18280052482967415 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28281264175951776 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22902684563758388 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30996874999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11436170212765957 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 27.06 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/BBAI_QWEEN_V000000_LUMEN_14B/57fd3fdc-dfdd-44ee-8c30-dc5ce4a0df8d.json b/data/hfopenllm_v2/Supichi/BBAI_QWEEN_V000000_LUMEN_14B/57fd3fdc-dfdd-44ee-8c30-dc5ce4a0df8d.json new file mode 100644 index 000000000..7fd74fb81 --- /dev/null +++ b/data/hfopenllm_v2/Supichi/BBAI_QWEEN_V000000_LUMEN_14B/57fd3fdc-dfdd-44ee-8c30-dc5ce4a0df8d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Supichi_BBAI_QWEEN_V000000_LUMEN_14B/1762652579.895749", + "retrieved_timestamp": "1762652579.8957498", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Supichi/BBAI_QWEEN_V000000_LUMEN_14B", + "developer": "Supichi", + "inference_platform": "unknown", + "id": "Supichi/BBAI_QWEEN_V000000_LUMEN_14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18145188100905596 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22972580681005383 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23154362416107382 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3445416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11602393617021277 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 10.366 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/HF_TOKEN/cd0ccaff-e1b3-4c11-a8a0-37137d0386e2.json b/data/hfopenllm_v2/Supichi/HF_TOKEN/cd0ccaff-e1b3-4c11-a8a0-37137d0386e2.json new file mode 100644 index 000000000..40c0d35df --- /dev/null +++ b/data/hfopenllm_v2/Supichi/HF_TOKEN/cd0ccaff-e1b3-4c11-a8a0-37137d0386e2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Supichi_HF_TOKEN/1762652579.895958", + "retrieved_timestamp": "1762652579.895958", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Supichi/HF_TOKEN", + "developer": "Supichi", + "inference_platform": "unknown", + "id": "Supichi/HF_TOKEN" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1379872072766925 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2763924734767205 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0007552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32717708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11095412234042554 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 17.161 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Supichi/NJS26/f336c7ee-2275-4045-a227-1a7abbaebf63.json b/data/hfopenllm_v2/Supichi/NJS26/f336c7ee-2275-4045-a227-1a7abbaebf63.json new file mode 100644 index 000000000..9f4e574f6 --- /dev/null +++ b/data/hfopenllm_v2/Supichi/NJS26/f336c7ee-2275-4045-a227-1a7abbaebf63.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Supichi_NJS26/1762652579.8961651", + "retrieved_timestamp": "1762652579.8961651", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Supichi/NJS26", + "developer": "Supichi", + "inference_platform": "unknown", + "id": "Supichi/NJS26" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04481331755298164 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4780152929488641 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0324773413897281 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38540625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036901595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.0/5bb52ed5-e59a-4e60-a6eb-9e9322d95ccc.json b/data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.0/5bb52ed5-e59a-4e60-a6eb-9e9322d95ccc.json new file mode 100644 index 000000000..f8e837b58 --- /dev/null +++ b/data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.0/5bb52ed5-e59a-4e60-a6eb-9e9322d95ccc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Svak_MN-12B-Inferor-v0.0/1762652579.896373", + "retrieved_timestamp": "1762652579.896374", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Svak/MN-12B-Inferor-v0.0", + "developer": "Svak", + "inference_platform": "unknown", + "id": "Svak/MN-12B-Inferor-v0.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5707555951541909 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5195010930589931 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10196374622356495 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46388541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3558843085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.1/9bfe838e-a568-4933-b03d-3e9ae6d2026d.json b/data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.1/9bfe838e-a568-4933-b03d-3e9ae6d2026d.json new file mode 100644 index 000000000..17d95df79 --- /dev/null +++ b/data/hfopenllm_v2/Svak/MN-12B-Inferor-v0.1/9bfe838e-a568-4933-b03d-3e9ae6d2026d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Svak_MN-12B-Inferor-v0.1/1762652579.8966348", + "retrieved_timestamp": "1762652579.896636", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Svak/MN-12B-Inferor-v0.1", + "developer": "Svak", + "inference_platform": "unknown", + "id": "Svak/MN-12B-Inferor-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6346527214457639 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5146762089838804 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12613293051359517 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32550335570469796 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4350833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3661901595744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo/58bacacb-2936-4685-b0ba-dc8f47f3232a.json b/data/hfopenllm_v2/Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo/58bacacb-2936-4685-b0ba-dc8f47f3232a.json new file mode 100644 index 000000000..689bfdbd0 --- /dev/null +++ b/data/hfopenllm_v2/Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo/58bacacb-2936-4685-b0ba-dc8f47f3232a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Syed-Hasan-8503_Phi-3-mini-4K-instruct-cpo-simpo/1762652579.896852", + "retrieved_timestamp": "1762652579.896853", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo", + "developer": "Syed-Hasan-8503", + "inference_platform": "unknown", + "id": "Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5714049832222946 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5681534123661078 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15709969788519637 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3963541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38605385638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V1-P1/5bedfdac-2976-4a21-9ae2-a5b5b06e1e14.json b/data/hfopenllm_v2/T145/KRONOS-8B-V1-P1/5bedfdac-2976-4a21-9ae2-a5b5b06e1e14.json new file mode 100644 index 000000000..2e6246333 --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V1-P1/5bedfdac-2976-4a21-9ae2-a5b5b06e1e14.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V1-P1/1762652579.897121", + "retrieved_timestamp": "1762652579.8971221", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/KRONOS-8B-V1-P1", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/KRONOS-8B-V1-P1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7849783020164276 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.508544756293663 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19788519637462235 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3881041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3759973404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V1-P2/a5d0fc39-cac5-409f-8375-636ef97fba8c.json b/data/hfopenllm_v2/T145/KRONOS-8B-V1-P2/a5d0fc39-cac5-409f-8375-636ef97fba8c.json new file mode 100644 index 000000000..87d83ff5a --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V1-P2/a5d0fc39-cac5-409f-8375-636ef97fba8c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V1-P2/1762652579.897378", + "retrieved_timestamp": "1762652579.8973792", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/KRONOS-8B-V1-P2", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/KRONOS-8B-V1-P2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6724213974476612 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47717566218002166 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16012084592145015 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3567604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3453291223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V1-P3/14eb1867-80a0-47f9-9b2a-f0a05f683fb4.json b/data/hfopenllm_v2/T145/KRONOS-8B-V1-P3/14eb1867-80a0-47f9-9b2a-f0a05f683fb4.json new file mode 100644 index 000000000..4d0af8ffd --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V1-P3/14eb1867-80a0-47f9-9b2a-f0a05f683fb4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V1-P3/1762652579.897578", + "retrieved_timestamp": "1762652579.897579", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/KRONOS-8B-V1-P3", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/KRONOS-8B-V1-P3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7137373280673058 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5127875870036823 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19259818731117825 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3615625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34050864361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V2/ff4c64ec-f44b-4bec-9534-bafa632a0e3f.json b/data/hfopenllm_v2/T145/KRONOS-8B-V2/ff4c64ec-f44b-4bec-9534-bafa632a0e3f.json new file mode 100644 index 000000000..446292f96 --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V2/ff4c64ec-f44b-4bec-9534-bafa632a0e3f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V2/1762652579.897814", + "retrieved_timestamp": "1762652579.897815", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/KRONOS-8B-V2", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/KRONOS-8B-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5180243974875552 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.513268555595521 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22658610271903323 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38286458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3737533244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V3/fc5613f1-09bc-4b82-89f4-4ee671cad5bf.json b/data/hfopenllm_v2/T145/KRONOS-8B-V3/fc5613f1-09bc-4b82-89f4-4ee671cad5bf.json new file mode 100644 index 000000000..4aa1a5c6c --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V3/fc5613f1-09bc-4b82-89f4-4ee671cad5bf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V3/1762652579.8980181", + "retrieved_timestamp": "1762652579.898019", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/KRONOS-8B-V3", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/KRONOS-8B-V3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5474751437297483 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.511865544689898 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2598187311178248 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3922291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3738364361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V4/af8665b4-d9be-4243-9c8d-0b43e7abd540.json b/data/hfopenllm_v2/T145/KRONOS-8B-V4/af8665b4-d9be-4243-9c8d-0b43e7abd540.json new file mode 100644 index 000000000..58b32f6dd --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V4/af8665b4-d9be-4243-9c8d-0b43e7abd540.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V4/1762652579.898447", + "retrieved_timestamp": "1762652579.898448", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/KRONOS-8B-V4", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/KRONOS-8B-V4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7889499860370484 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5092470034846742 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19486404833836857 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38295833333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37857380319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V5/290206b5-0e46-4f92-a2bd-f2c53ef3d147.json b/data/hfopenllm_v2/T145/KRONOS-8B-V5/290206b5-0e46-4f92-a2bd-f2c53ef3d147.json new file mode 100644 index 000000000..81073668e --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V5/290206b5-0e46-4f92-a2bd-f2c53ef3d147.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V5/1762652579.8986921", + "retrieved_timestamp": "1762652579.898693", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/KRONOS-8B-V5", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/KRONOS-8B-V5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5405058577906621 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5088651598969166 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2688821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40546875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37591422872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V6/78813c35-3eaa-4ae6-9099-bf79efb8b0df.json b/data/hfopenllm_v2/T145/KRONOS-8B-V6/78813c35-3eaa-4ae6-9099-bf79efb8b0df.json new file mode 100644 index 000000000..78f19fc80 --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V6/78813c35-3eaa-4ae6-9099-bf79efb8b0df.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V6/1762652579.898935", + "retrieved_timestamp": "1762652579.898936", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/KRONOS-8B-V6", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/KRONOS-8B-V6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7022467054083166 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5033606149499412 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2598187311178248 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41210416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3501496010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V7/1358fee5-3874-4997-b1f0-6e93c6c5e9c0.json b/data/hfopenllm_v2/T145/KRONOS-8B-V7/1358fee5-3874-4997-b1f0-6e93c6c5e9c0.json new file mode 100644 index 000000000..aa2d8d0aa --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V7/1358fee5-3874-4997-b1f0-6e93c6c5e9c0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V7/1762652579.899169", + "retrieved_timestamp": "1762652579.8991702", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/KRONOS-8B-V7", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/KRONOS-8B-V7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3529102780622083 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4526219443939161 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11102719033232629 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36711458333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2696974734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V8/57a4ddc6-0447-4840-94bc-5bb136025aab.json b/data/hfopenllm_v2/T145/KRONOS-8B-V8/57a4ddc6-0447-4840-94bc-5bb136025aab.json new file mode 100644 index 000000000..76cf489b0 --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V8/57a4ddc6-0447-4840-94bc-5bb136025aab.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V8/1762652579.899387", + "retrieved_timestamp": "1762652579.8993878", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/KRONOS-8B-V8", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/KRONOS-8B-V8" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7770349339751859 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5094406613555632 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20468277945619334 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3868958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37824135638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/KRONOS-8B-V9/6fbb6156-196d-4523-900e-35316100d3b9.json b/data/hfopenllm_v2/T145/KRONOS-8B-V9/6fbb6156-196d-4523-900e-35316100d3b9.json new file mode 100644 index 000000000..7dd5a10bf --- /dev/null +++ b/data/hfopenllm_v2/T145/KRONOS-8B-V9/6fbb6156-196d-4523-900e-35316100d3b9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V9/1762652579.8996658", + "retrieved_timestamp": "1762652579.899667", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/KRONOS-8B-V9", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/KRONOS-8B-V9" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7855778224001206 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5099211908307056 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1986404833836858 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3868020833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3751662234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/Llama-3.1-8B-Instruct-Zeus/38e620aa-c577-4b14-bebd-e98ebcbe48b2.json b/data/hfopenllm_v2/T145/Llama-3.1-8B-Instruct-Zeus/38e620aa-c577-4b14-bebd-e98ebcbe48b2.json new file mode 100644 index 000000000..9f324330a --- /dev/null +++ b/data/hfopenllm_v2/T145/Llama-3.1-8B-Instruct-Zeus/38e620aa-c577-4b14-bebd-e98ebcbe48b2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_Llama-3.1-8B-Instruct-Zeus/1762652579.899903", + "retrieved_timestamp": "1762652579.899904", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/Llama-3.1-8B-Instruct-Zeus", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/Llama-3.1-8B-Instruct-Zeus" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7941207108250552 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5173982439996302 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19561933534743203 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39762499999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38929521276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/Meta-Llama-3.1-8B-Instruct-TIES/15b92d44-3d68-4c6a-bddd-5676ebda2e10.json b/data/hfopenllm_v2/T145/Meta-Llama-3.1-8B-Instruct-TIES/15b92d44-3d68-4c6a-bddd-5676ebda2e10.json new file mode 100644 index 000000000..ac8d9e822 --- /dev/null +++ b/data/hfopenllm_v2/T145/Meta-Llama-3.1-8B-Instruct-TIES/15b92d44-3d68-4c6a-bddd-5676ebda2e10.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_Meta-Llama-3.1-8B-Instruct-TIES/1762652579.900369", + "retrieved_timestamp": "1762652579.900369", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/Meta-Llama-3.1-8B-Instruct-TIES", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/Meta-Llama-3.1-8B-Instruct-TIES" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5423542866261519 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5070111385564763 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20996978851963746 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3842916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37799202127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V10/464bae3d-bd06-4264-a939-59ab8e562ca6.json b/data/hfopenllm_v2/T145/ZEUS-8B-V10/464bae3d-bd06-4264-a939-59ab8e562ca6.json new file mode 100644 index 000000000..ce6cae110 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V10/464bae3d-bd06-4264-a939-59ab8e562ca6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V10/1762652579.900583", + "retrieved_timestamp": "1762652579.900584", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V10", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V10" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7706651684197928 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5269758270442659 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21148036253776434 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38978124999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.390375664893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V11/a6eedf29-9ec8-4b03-a8f5-c9c4e2bda688.json b/data/hfopenllm_v2/T145/ZEUS-8B-V11/a6eedf29-9ec8-4b03-a8f5-c9c4e2bda688.json new file mode 100644 index 000000000..d416e9837 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V11/a6eedf29-9ec8-4b03-a8f5-c9c4e2bda688.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V11/1762652579.900793", + "retrieved_timestamp": "1762652579.900793", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V11", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V11" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8099575792231279 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5161982586505715 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19637462235649547 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38066666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38838098404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V12/1ab70352-9bda-47c8-8bdf-90860934cfc7.json b/data/hfopenllm_v2/T145/ZEUS-8B-V12/1ab70352-9bda-47c8-8bdf-90860934cfc7.json new file mode 100644 index 000000000..e4c539500 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V12/1ab70352-9bda-47c8-8bdf-90860934cfc7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V12/1762652579.901004", + "retrieved_timestamp": "1762652579.901004", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V12", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V12" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.781556270695089 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5253912026310238 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21148036253776434 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38584375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3912067819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V13-abliterated/7c39d06a-dafe-40a7-b5a1-dca14dcadff2.json b/data/hfopenllm_v2/T145/ZEUS-8B-V13-abliterated/7c39d06a-dafe-40a7-b5a1-dca14dcadff2.json new file mode 100644 index 000000000..33c8066c7 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V13-abliterated/7c39d06a-dafe-40a7-b5a1-dca14dcadff2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V13-abliterated/1762652579.901429", + "retrieved_timestamp": "1762652579.9014301", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V13-abliterated", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V13-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7877509452696623 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5197597316957202 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17900302114803626 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3871458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38721742021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V13/10823e50-9478-4a8a-83cf-5169a0bc1f1f.json b/data/hfopenllm_v2/T145/ZEUS-8B-V13/10823e50-9478-4a8a-83cf-5169a0bc1f1f.json new file mode 100644 index 000000000..524a1eccf --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V13/10823e50-9478-4a8a-83cf-5169a0bc1f1f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V13/1762652579.90121", + "retrieved_timestamp": "1762652579.9012108", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V13", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V13" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7904238531540756 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5277128851736589 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21374622356495468 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38447916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39112367021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V14/2b0eb3f5-d35e-41ea-ba69-18c0b8a3e1e1.json b/data/hfopenllm_v2/T145/ZEUS-8B-V14/2b0eb3f5-d35e-41ea-ba69-18c0b8a3e1e1.json new file mode 100644 index 000000000..030e9224d --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V14/2b0eb3f5-d35e-41ea-ba69-18c0b8a3e1e1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V14/1762652579.901653", + "retrieved_timestamp": "1762652579.901653", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V14", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V14" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.770939994769434 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5274593322517976 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3844479166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3913730053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V15/3e1be4f3-478f-4061-9856-f1beb0a749de.json b/data/hfopenllm_v2/T145/ZEUS-8B-V15/3e1be4f3-478f-4061-9856-f1beb0a749de.json new file mode 100644 index 000000000..08d4edf51 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V15/3e1be4f3-478f-4061-9856-f1beb0a749de.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V15/1762652579.901858", + "retrieved_timestamp": "1762652579.901859", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V15", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V15" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.701272623306161 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5537552380544757 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23036253776435045 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40199999999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40591755319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V16/7beef3ca-6423-4a81-836d-0e4cdc4af973.json b/data/hfopenllm_v2/T145/ZEUS-8B-V16/7beef3ca-6423-4a81-836d-0e4cdc4af973.json new file mode 100644 index 000000000..e7b1e0840 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V16/7beef3ca-6423-4a81-836d-0e4cdc4af973.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V16/1762652579.9020631", + "retrieved_timestamp": "1762652579.902064", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V16", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V16" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7925471083392066 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5265817990313368 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22054380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3950833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39261968085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V2/3344d19c-c79b-48b3-be5b-f5f27d6920ce.json b/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V2/3344d19c-c79b-48b3-be5b-f5f27d6920ce.json new file mode 100644 index 000000000..e9165158f --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V2/3344d19c-c79b-48b3-be5b-f5f27d6920ce.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V17-abliterated-V2/1762652579.902674", + "retrieved_timestamp": "1762652579.902674", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V17-abliterated-V2", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V17-abliterated-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6532123654126606 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49280119619174295 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11178247734138973 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3407291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34017619680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V4/bf9c0bfa-98e5-45b2-8819-0911af81d78f.json b/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V4/bf9c0bfa-98e5-45b2-8819-0911af81d78f.json new file mode 100644 index 000000000..1d2a32fdd --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated-V4/bf9c0bfa-98e5-45b2-8819-0911af81d78f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V17-abliterated-V4/1762652579.902891", + "retrieved_timestamp": "1762652579.902891", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V17-abliterated-V4", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V17-abliterated-V4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7228298691915229 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5169216944225185 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09365558912386707 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4187083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37741023936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated/35f89ab6-c6c9-41cd-9296-af4921490c3f.json b/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated/35f89ab6-c6c9-41cd-9296-af4921490c3f.json new file mode 100644 index 000000000..670e8d392 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V17-abliterated/35f89ab6-c6c9-41cd-9296-af4921490c3f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V17-abliterated/1762652579.902467", + "retrieved_timestamp": "1762652579.9024682", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V17-abliterated", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V17-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7576009432749549 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.520041374505222 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42692708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36220079787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.594 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V17/0368a3ba-e461-45d1-a037-3b9160a8efbb.json b/data/hfopenllm_v2/T145/ZEUS-8B-V17/0368a3ba-e461-45d1-a037-3b9160a8efbb.json new file mode 100644 index 000000000..09a95a6d3 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V17/0368a3ba-e461-45d1-a037-3b9160a8efbb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V17/1762652579.902262", + "retrieved_timestamp": "1762652579.902263", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V17", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V17" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7940708431406447 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.525086643033107 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2243202416918429 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221476510067114 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40162499999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39345079787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V18/e5d250e7-8d0a-48b5-aaad-3d1da02eab00.json b/data/hfopenllm_v2/T145/ZEUS-8B-V18/e5d250e7-8d0a-48b5-aaad-3d1da02eab00.json new file mode 100644 index 000000000..e90f76e73 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V18/e5d250e7-8d0a-48b5-aaad-3d1da02eab00.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V18/1762652579.903114", + "retrieved_timestamp": "1762652579.903115", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V18", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V18" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7834046995305788 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5269802862530547 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21827794561933533 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40429166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39419880319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V19/0392cccb-0a1c-486e-876a-1404f14a1080.json b/data/hfopenllm_v2/T145/ZEUS-8B-V19/0392cccb-0a1c-486e-876a-1404f14a1080.json new file mode 100644 index 000000000..b2d34b30a --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V19/0392cccb-0a1c-486e-876a-1404f14a1080.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V19/1762652579.903361", + "retrieved_timestamp": "1762652579.903362", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V19", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V19" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7882507302845339 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5276233222408697 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22054380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221476510067114 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40429166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3933676861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V2-ORPO/588b0fce-37cd-41f1-8eaa-50383cdc0f00.json b/data/hfopenllm_v2/T145/ZEUS-8B-V2-ORPO/588b0fce-37cd-41f1-8eaa-50383cdc0f00.json new file mode 100644 index 000000000..fe74092e2 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V2-ORPO/588b0fce-37cd-41f1-8eaa-50383cdc0f00.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2-ORPO/1762652579.903775", + "retrieved_timestamp": "1762652579.903776", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V2-ORPO", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V2-ORPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7186830941900824 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5075246906772 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18277945619335348 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39349999999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3677692819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V2-abliterated/926fb6ed-0750-4d04-8e3c-da470e236db2.json b/data/hfopenllm_v2/T145/ZEUS-8B-V2-abliterated/926fb6ed-0750-4d04-8e3c-da470e236db2.json new file mode 100644 index 000000000..6a709041d --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V2-abliterated/926fb6ed-0750-4d04-8e3c-da470e236db2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2-abliterated/1762652579.9039848", + "retrieved_timestamp": "1762652579.903986", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V2-abliterated", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V2-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7895495064207414 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5128868622210663 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21148036253776434 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3910833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38248005319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V2/e64503c5-d9ce-4544-8caf-0fec97a2b592.json b/data/hfopenllm_v2/T145/ZEUS-8B-V2/e64503c5-d9ce-4544-8caf-0fec97a2b592.json new file mode 100644 index 000000000..8bb5d4874 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V2/e64503c5-d9ce-4544-8caf-0fec97a2b592.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2/1762652579.9035678", + "retrieved_timestamp": "1762652579.903569", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V2", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8029384255996312 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5194405455747161 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21601208459214502 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3910208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3896276595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V20/0ba8bca5-3a61-499a-8e2d-ca84f52ef654.json b/data/hfopenllm_v2/T145/ZEUS-8B-V20/0ba8bca5-3a61-499a-8e2d-ca84f52ef654.json new file mode 100644 index 000000000..7e5f033d3 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V20/0ba8bca5-3a61-499a-8e2d-ca84f52ef654.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V20/1762652579.904202", + "retrieved_timestamp": "1762652579.904203", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V20", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V20" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7955945779420825 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5244005058415827 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40432291666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3929521276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V21/380a44ec-387a-4f34-92c2-18fc7a8d5ce0.json b/data/hfopenllm_v2/T145/ZEUS-8B-V21/380a44ec-387a-4f34-92c2-18fc7a8d5ce0.json new file mode 100644 index 000000000..ea4c316ea --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V21/380a44ec-387a-4f34-92c2-18fc7a8d5ce0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V21/1762652579.904516", + "retrieved_timestamp": "1762652579.904516", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V21", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V21" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3785145635801894 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33975753940458464 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1593655589123867 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32615625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17137632978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V22/3f44a1c0-b70a-4712-a0c1-bdf3318b270c.json b/data/hfopenllm_v2/T145/ZEUS-8B-V22/3f44a1c0-b70a-4712-a0c1-bdf3318b270c.json new file mode 100644 index 000000000..665c3135d --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V22/3f44a1c0-b70a-4712-a0c1-bdf3318b270c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V22/1762652579.9047282", + "retrieved_timestamp": "1762652579.9047291", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V22", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V22" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7995163942782927 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5244915522507715 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22280966767371602 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3989583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3937832446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V23/f83b7584-0e52-4658-ae15-f295064b9111.json b/data/hfopenllm_v2/T145/ZEUS-8B-V23/f83b7584-0e52-4658-ae15-f295064b9111.json new file mode 100644 index 000000000..568605e49 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V23/f83b7584-0e52-4658-ae15-f295064b9111.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V23/1762652579.904932", + "retrieved_timestamp": "1762652579.9049332", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V23", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V23" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7621222799948582 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.519500470668349 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18202416918429004 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3921979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3666057180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V24/51368b21-1b48-4c07-9b09-8cae0786200b.json b/data/hfopenllm_v2/T145/ZEUS-8B-V24/51368b21-1b48-4c07-9b09-8cae0786200b.json new file mode 100644 index 000000000..7e3d0d6ad --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V24/51368b21-1b48-4c07-9b09-8cae0786200b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V24/1762652579.905136", + "retrieved_timestamp": "1762652579.9051368", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V24", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V24" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5999813827311533 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4777962576721959 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14577039274924472 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3729166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32845744680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V25/52b41117-c308-4e8c-9c61-ce8e4faf778f.json b/data/hfopenllm_v2/T145/ZEUS-8B-V25/52b41117-c308-4e8c-9c61-ce8e4faf778f.json new file mode 100644 index 000000000..9e0cbcdb6 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V25/52b41117-c308-4e8c-9c61-ce8e4faf778f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V25/1762652579.905337", + "retrieved_timestamp": "1762652579.905338", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V25", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V25" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33202790817253774 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4546907005207668 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3488229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2884807180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V26/8ae81cea-b179-4025-916a-9bc73755de82.json b/data/hfopenllm_v2/T145/ZEUS-8B-V26/8ae81cea-b179-4025-916a-9bc73755de82.json new file mode 100644 index 000000000..452099442 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V26/8ae81cea-b179-4025-916a-9bc73755de82.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V26/1762652579.905539", + "retrieved_timestamp": "1762652579.905539", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V26", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V26" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6707979272774018 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5231548583920674 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12462235649546828 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40162499999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39070811170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V27/bf31323b-bfb5-464a-b343-0605dafb5a60.json b/data/hfopenllm_v2/T145/ZEUS-8B-V27/bf31323b-bfb5-464a-b343-0605dafb5a60.json new file mode 100644 index 000000000..a45533efd --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V27/bf31323b-bfb5-464a-b343-0605dafb5a60.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V27/1762652579.9057322", + "retrieved_timestamp": "1762652579.905733", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V27", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V27" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.654361538495636 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.52303129292911 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13444108761329304 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39768749999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3902094414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V28/e31561ff-779a-4ebe-b6fe-686b2895c53b.json b/data/hfopenllm_v2/T145/ZEUS-8B-V28/e31561ff-779a-4ebe-b6fe-686b2895c53b.json new file mode 100644 index 000000000..da0056098 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V28/e31561ff-779a-4ebe-b6fe-686b2895c53b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V28/1762652579.905931", + "retrieved_timestamp": "1762652579.905931", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V28", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V28" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.635252241829457 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5254256199968339 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1268882175226586 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38962499999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3902094414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V29/c383684a-2f70-46e9-ab55-4d68903613b3.json b/data/hfopenllm_v2/T145/ZEUS-8B-V29/c383684a-2f70-46e9-ab55-4d68903613b3.json new file mode 100644 index 000000000..485a9b612 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V29/c383684a-2f70-46e9-ab55-4d68903613b3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V29/1762652579.906123", + "retrieved_timestamp": "1762652579.906123", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V29", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V29" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7417640748768822 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5253330901112457 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16012084592145015 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3263422818791946 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4002604166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3920378989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V2L1/015f91ef-9318-44d6-acb2-17628000c273.json b/data/hfopenllm_v2/T145/ZEUS-8B-V2L1/015f91ef-9318-44d6-acb2-17628000c273.json new file mode 100644 index 000000000..1d938f1e6 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V2L1/015f91ef-9318-44d6-acb2-17628000c273.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2L1/1762652579.906316", + "retrieved_timestamp": "1762652579.906317", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V2L1", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V2L1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3191886416929303 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5013485375260267 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12386706948640483 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38819791666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36377992021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V2L2/8e7be46e-af57-4e88-9df5-3161110dfa66.json b/data/hfopenllm_v2/T145/ZEUS-8B-V2L2/8e7be46e-af57-4e88-9df5-3161110dfa66.json new file mode 100644 index 000000000..9667f8cb5 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V2L2/8e7be46e-af57-4e88-9df5-3161110dfa66.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2L2/1762652579.9065118", + "retrieved_timestamp": "1762652579.906513", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V2L2", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V2L2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8020640788662969 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5202843665402132 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20166163141993956 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39746875000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38838098404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V3/6b8fca40-f44b-45a0-bd5b-04b2fa2067a2.json b/data/hfopenllm_v2/T145/ZEUS-8B-V3/6b8fca40-f44b-45a0-bd5b-04b2fa2067a2.json new file mode 100644 index 000000000..51791f21c --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V3/6b8fca40-f44b-45a0-bd5b-04b2fa2067a2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V3/1762652579.906709", + "retrieved_timestamp": "1762652579.90671", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V3", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7886751596874072 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5265064133535374 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16767371601208458 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221476510067114 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4016875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38040226063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V30/839ff423-8c5c-4fab-aecf-b535ee06af36.json b/data/hfopenllm_v2/T145/ZEUS-8B-V30/839ff423-8c5c-4fab-aecf-b535ee06af36.json new file mode 100644 index 000000000..a7157686f --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V30/839ff423-8c5c-4fab-aecf-b535ee06af36.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V30/1762652579.907134", + "retrieved_timestamp": "1762652579.907138", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V30", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V30" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7435626360279614 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5243248855841048 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15861027190332327 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4029270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3943650265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V4/9330c290-ee47-4a7d-9b8f-62903402e0e3.json b/data/hfopenllm_v2/T145/ZEUS-8B-V4/9330c290-ee47-4a7d-9b8f-62903402e0e3.json new file mode 100644 index 000000000..fe4ad996b --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V4/9330c290-ee47-4a7d-9b8f-62903402e0e3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V4/1762652579.9075332", + "retrieved_timestamp": "1762652579.907535", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V4", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7807317916461656 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5245974297200655 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19259818731117825 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4028958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37882313829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V6/09670c05-9463-479f-89e3-5029fd5d7ee7.json b/data/hfopenllm_v2/T145/ZEUS-8B-V6/09670c05-9463-479f-89e3-5029fd5d7ee7.json new file mode 100644 index 000000000..64a38cbdc --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V6/09670c05-9463-479f-89e3-5029fd5d7ee7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V6/1762652579.9077919", + "retrieved_timestamp": "1762652579.9077928", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V6", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7837792612490415 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5239561762634447 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20241691842900303 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4068020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37591422872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V7/c6a9173a-bacc-40bd-9572-239f9901e065.json b/data/hfopenllm_v2/T145/ZEUS-8B-V7/c6a9173a-bacc-40bd-9572-239f9901e065.json new file mode 100644 index 000000000..bdefea2a3 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V7/c6a9173a-bacc-40bd-9572-239f9901e065.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V7/1762652579.908076", + "retrieved_timestamp": "1762652579.908077", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V7", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7786085364610345 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5070394117180643 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14803625377643503 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41616666666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3812333776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V8/c0035841-a312-493e-9c44-a75133e894d1.json b/data/hfopenllm_v2/T145/ZEUS-8B-V8/c0035841-a312-493e-9c44-a75133e894d1.json new file mode 100644 index 000000000..64a5ee087 --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V8/c0035841-a312-493e-9c44-a75133e894d1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V8/1762652579.908298", + "retrieved_timestamp": "1762652579.908299", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V8", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V8" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7913979352562313 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5064510419864701 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13293051359516617 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.421375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37608045212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/T145/ZEUS-8B-V9/f5876dc1-b769-431f-84fe-365d2457902e.json b/data/hfopenllm_v2/T145/ZEUS-8B-V9/f5876dc1-b769-431f-84fe-365d2457902e.json new file mode 100644 index 000000000..d3c8d112e --- /dev/null +++ b/data/hfopenllm_v2/T145/ZEUS-8B-V9/f5876dc1-b769-431f-84fe-365d2457902e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V9/1762652579.908509", + "retrieved_timestamp": "1762652579.90851", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/ZEUS-8B-V9", + "developer": "T145", + "inference_platform": "unknown", + "id": "T145/ZEUS-8B-V9" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5551436854213487 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5207256346477752 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21374622356495468 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3949270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39012632978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m-hf/077f7956-8c9b-47ef-8c4d-40455bbb0027.json b/data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m-hf/077f7956-8c9b-47ef-8c4d-40455bbb0027.json new file mode 100644 index 000000000..7c926c9d3 --- /dev/null +++ b/data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m-hf/077f7956-8c9b-47ef-8c4d-40455bbb0027.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b-chat-1m-hf/1762652579.9096901", + "retrieved_timestamp": "1762652579.9096909", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "THUDM/glm-4-9b-chat-1m-hf", + "developer": "THUDM", + "inference_platform": "unknown", + "id": "THUDM/glm-4-9b-chat-1m-hf" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5341106043076814 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3900953106836365 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04833836858006042 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36888541666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18143284574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GlmForCausalLM", + "params_billions": 9.484 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m/f0c306f0-683e-4582-81b7-f0a2c372060f.json b/data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m/f0c306f0-683e-4582-81b7-f0a2c372060f.json new file mode 100644 index 000000000..d5f1bcecc --- /dev/null +++ b/data/hfopenllm_v2/THUDM/glm-4-9b-chat-1m/f0c306f0-683e-4582-81b7-f0a2c372060f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b-chat-1m/1762652579.909478", + "retrieved_timestamp": "1762652579.909479", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "THUDM/glm-4-9b-chat-1m", + "developer": "THUDM", + "inference_platform": "unknown", + "id": "THUDM/glm-4-9b-chat-1m" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41800578218330303 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3794583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31632313829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "ChatGLMModel", + "params_billions": 9.484 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/THUDM/glm-4-9b-chat-hf/0af9353e-10d5-42e3-8bc9-4c736720ff30.json b/data/hfopenllm_v2/THUDM/glm-4-9b-chat-hf/0af9353e-10d5-42e3-8bc9-4c736720ff30.json new file mode 100644 index 000000000..6848dd5fd --- /dev/null +++ b/data/hfopenllm_v2/THUDM/glm-4-9b-chat-hf/0af9353e-10d5-42e3-8bc9-4c736720ff30.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b-chat-hf/1762652579.909895", + "retrieved_timestamp": "1762652579.909896", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "THUDM/glm-4-9b-chat-hf", + "developer": "THUDM", + "inference_platform": "unknown", + "id": "THUDM/glm-4-9b-chat-hf" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6513140688927601 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4432308604245425 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35930208333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27742686170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GlmForCausalLM", + "params_billions": 9.4 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/THUDM/glm-4-9b-chat/e7c5d8ef-d480-4ab9-b698-409e5ea76cf8.json b/data/hfopenllm_v2/THUDM/glm-4-9b-chat/e7c5d8ef-d480-4ab9-b698-409e5ea76cf8.json new file mode 100644 index 000000000..6c58e016b --- /dev/null +++ b/data/hfopenllm_v2/THUDM/glm-4-9b-chat/e7c5d8ef-d480-4ab9-b698-409e5ea76cf8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b-chat/1762652579.909267", + "retrieved_timestamp": "1762652579.909267", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "THUDM/glm-4-9b-chat", + "developer": "THUDM", + "inference_platform": "unknown", + "id": "THUDM/glm-4-9b-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47363884291035735 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3994270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.316655585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "ChatGLMModelM", + "params_billions": 9.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/THUDM/glm-4-9b/bd038a6c-1241-401d-962d-e033434ba735.json b/data/hfopenllm_v2/THUDM/glm-4-9b/bd038a6c-1241-401d-962d-e033434ba735.json new file mode 100644 index 000000000..787462be2 --- /dev/null +++ b/data/hfopenllm_v2/THUDM/glm-4-9b/bd038a6c-1241-401d-962d-e033434ba735.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b/1762652579.9090161", + "retrieved_timestamp": "1762652579.9090161", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "THUDM/glm-4-9b", + "developer": "THUDM", + "inference_platform": "unknown", + "id": "THUDM/glm-4-9b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1426082793654171 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5528368141665274 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4385833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4144780585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "ChatGLMModelM", + "params_billions": 9.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TIGER-Lab/AceCodeRM-7B/eb1d6ce5-3b0c-477d-9ca6-2f3ff8bc4e30.json b/data/hfopenllm_v2/TIGER-Lab/AceCodeRM-7B/eb1d6ce5-3b0c-477d-9ca6-2f3ff8bc4e30.json new file mode 100644 index 000000000..cfa3bb515 --- /dev/null +++ b/data/hfopenllm_v2/TIGER-Lab/AceCodeRM-7B/eb1d6ce5-3b0c-477d-9ca6-2f3ff8bc4e30.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TIGER-Lab_AceCodeRM-7B/1762652579.9101062", + "retrieved_timestamp": "1762652579.910107", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TIGER-Lab/AceCodeRM-7B", + "developer": "TIGER-Lab", + "inference_platform": "unknown", + "id": "TIGER-Lab/AceCodeRM-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5854931581536988 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4773230085351336 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3466767371601209 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41920833333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3361037234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalRM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TIGER-Lab/MAmmoTH2-7B-Plus/93503cc0-80aa-44b5-9155-c81cd44a9ac9.json b/data/hfopenllm_v2/TIGER-Lab/MAmmoTH2-7B-Plus/93503cc0-80aa-44b5-9155-c81cd44a9ac9.json new file mode 100644 index 000000000..4bb4ad4ad --- /dev/null +++ b/data/hfopenllm_v2/TIGER-Lab/MAmmoTH2-7B-Plus/93503cc0-80aa-44b5-9155-c81cd44a9ac9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TIGER-Lab_MAmmoTH2-7B-Plus/1762652579.9110248", + "retrieved_timestamp": "1762652579.911026", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TIGER-Lab/MAmmoTH2-7B-Plus", + "developer": "TIGER-Lab", + "inference_platform": "unknown", + "id": "TIGER-Lab/MAmmoTH2-7B-Plus" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5574664113441224 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42346949888019064 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18580060422960726 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41235416666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30169547872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TeeZee/DoubleBagel-57B-v1.0/1315f2ad-2e39-4cab-b09a-c74d0779f895.json b/data/hfopenllm_v2/TeeZee/DoubleBagel-57B-v1.0/1315f2ad-2e39-4cab-b09a-c74d0779f895.json new file mode 100644 index 000000000..676d381bb --- /dev/null +++ b/data/hfopenllm_v2/TeeZee/DoubleBagel-57B-v1.0/1315f2ad-2e39-4cab-b09a-c74d0779f895.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TeeZee_DoubleBagel-57B-v1.0/1762652579.9121659", + "retrieved_timestamp": "1762652579.9121659", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TeeZee/DoubleBagel-57B-v1.0", + "developer": "TeeZee", + "inference_platform": "unknown", + "id": "TeeZee/DoubleBagel-57B-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23363342597640924 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.325078559362514 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.009818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43148958333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14777260638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 56.703 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0/ec8a8e25-f985-40a8-80ff-0c7d7595029d.json b/data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0/ec8a8e25-f985-40a8-80ff-0c7d7595029d.json new file mode 100644 index 000000000..c073d9459 --- /dev/null +++ b/data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0/ec8a8e25-f985-40a8-80ff-0c7d7595029d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Telugu-LLM-Labs_Indic-gemma-2b-finetuned-sft-Navarasa-2.0/1762652579.912417", + "retrieved_timestamp": "1762652579.912417", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0", + "developer": "Telugu-LLM-Labs", + "inference_platform": "unknown", + "id": "Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21030310686755588 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3240881373468133 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027190332326283987 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24328859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3899375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12790890957446807 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0/89d117f3-7a67-4e30-82b2-b42efaf44024.json b/data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0/89d117f3-7a67-4e30-82b2-b42efaf44024.json new file mode 100644 index 000000000..c44ef7378 --- /dev/null +++ b/data/hfopenllm_v2/Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0/89d117f3-7a67-4e30-82b2-b42efaf44024.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Telugu-LLM-Labs_Indic-gemma-7b-finetuned-sft-Navarasa-2.0/1762652579.912673", + "retrieved_timestamp": "1762652579.912673", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0", + "developer": "Telugu-LLM-Labs", + "inference_platform": "unknown", + "id": "Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32368449048524583 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40229948924733394 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0256797583081571 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40832291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23503989361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TencentARC/LLaMA-Pro-8B-Instruct/98ea850e-7019-4728-a558-8b1819ec47c2.json b/data/hfopenllm_v2/TencentARC/LLaMA-Pro-8B-Instruct/98ea850e-7019-4728-a558-8b1819ec47c2.json new file mode 100644 index 000000000..88ca70a09 --- /dev/null +++ b/data/hfopenllm_v2/TencentARC/LLaMA-Pro-8B-Instruct/98ea850e-7019-4728-a558-8b1819ec47c2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TencentARC_LLaMA-Pro-8B-Instruct/1762652579.9131231", + "retrieved_timestamp": "1762652579.913124", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TencentARC/LLaMA-Pro-8B-Instruct", + "developer": "TencentARC", + "inference_platform": "unknown", + "id": "TencentARC/LLaMA-Pro-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4486063644463357 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4224205282459997 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.024924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41902083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19456449468085107 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.357 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheDrummer/Cydonia-22B-v1.2/4a3e8df4-8e21-4c7c-aec8-afe353831c3d.json b/data/hfopenllm_v2/TheDrummer/Cydonia-22B-v1.2/4a3e8df4-8e21-4c7c-aec8-afe353831c3d.json new file mode 100644 index 000000000..315c7016f --- /dev/null +++ b/data/hfopenllm_v2/TheDrummer/Cydonia-22B-v1.2/4a3e8df4-8e21-4c7c-aec8-afe353831c3d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TheDrummer_Cydonia-22B-v1.2/1762652579.9138188", + "retrieved_timestamp": "1762652579.9138198", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheDrummer/Cydonia-22B-v1.2", + "developer": "TheDrummer", + "inference_platform": "unknown", + "id": "TheDrummer/Cydonia-22B-v1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5635114828654637 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.580856074392761 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20317220543806647 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40217708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4140625 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheDrummer/Ministrations-8B-v1/21d5973e-d827-4bd6-b050-346da350a0aa.json b/data/hfopenllm_v2/TheDrummer/Ministrations-8B-v1/21d5973e-d827-4bd6-b050-346da350a0aa.json new file mode 100644 index 000000000..6de98210c --- /dev/null +++ b/data/hfopenllm_v2/TheDrummer/Ministrations-8B-v1/21d5973e-d827-4bd6-b050-346da350a0aa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TheDrummer_Ministrations-8B-v1/1762652579.9148722", + "retrieved_timestamp": "1762652579.9148731", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheDrummer/Ministrations-8B-v1", + "developer": "TheDrummer", + "inference_platform": "unknown", + "id": "TheDrummer/Ministrations-8B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28219346888478125 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48766312602251366 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18429003021148035 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44490625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36436170212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.02 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheDrummer/Rocinante-12B-v1/f21e98c1-5535-4cb4-a9f0-541e49aff795.json b/data/hfopenllm_v2/TheDrummer/Rocinante-12B-v1/f21e98c1-5535-4cb4-a9f0-541e49aff795.json new file mode 100644 index 000000000..2da455ecd --- /dev/null +++ b/data/hfopenllm_v2/TheDrummer/Rocinante-12B-v1/f21e98c1-5535-4cb4-a9f0-541e49aff795.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TheDrummer_Rocinante-12B-v1/1762652579.915099", + "retrieved_timestamp": "1762652579.9150999", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheDrummer/Rocinante-12B-v1", + "developer": "TheDrummer", + "inference_platform": "unknown", + "id": "TheDrummer/Rocinante-12B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6076499244227538 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5065452085797449 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1268882175226586 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40171874999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34773936170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheDrunkenSnail/Daughter-of-Rhodia-12B/0f1c48a7-2a20-40c8-88e8-bdfdc3cdad40.json b/data/hfopenllm_v2/TheDrunkenSnail/Daughter-of-Rhodia-12B/0f1c48a7-2a20-40c8-88e8-bdfdc3cdad40.json new file mode 100644 index 000000000..adb02b5d5 --- /dev/null +++ b/data/hfopenllm_v2/TheDrunkenSnail/Daughter-of-Rhodia-12B/0f1c48a7-2a20-40c8-88e8-bdfdc3cdad40.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TheDrunkenSnail_Daughter-of-Rhodia-12B/1762652579.91594", + "retrieved_timestamp": "1762652579.9159412", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheDrunkenSnail/Daughter-of-Rhodia-12B", + "developer": "TheDrunkenSnail", + "inference_platform": "unknown", + "id": "TheDrunkenSnail/Daughter-of-Rhodia-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6903815210308648 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5179174184876773 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12235649546827794 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43477083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3641123670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheDrunkenSnail/Mother-of-Rhodia-12B/2178eb24-2558-44db-aff1-7903c2e0f657.json b/data/hfopenllm_v2/TheDrunkenSnail/Mother-of-Rhodia-12B/2178eb24-2558-44db-aff1-7903c2e0f657.json new file mode 100644 index 000000000..5b0e8d22e --- /dev/null +++ b/data/hfopenllm_v2/TheDrunkenSnail/Mother-of-Rhodia-12B/2178eb24-2558-44db-aff1-7903c2e0f657.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TheDrunkenSnail_Mother-of-Rhodia-12B/1762652579.9161909", + "retrieved_timestamp": "1762652579.9161909", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheDrunkenSnail/Mother-of-Rhodia-12B", + "developer": "TheDrunkenSnail", + "inference_platform": "unknown", + "id": "TheDrunkenSnail/Mother-of-Rhodia-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6504895898438365 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49479138664574934 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12235649546827794 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41241666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35513630319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheDrunkenSnail/Son-of-Rhodia/22c87268-7e49-42b4-9bbb-16a4b305c595.json b/data/hfopenllm_v2/TheDrunkenSnail/Son-of-Rhodia/22c87268-7e49-42b4-9bbb-16a4b305c595.json new file mode 100644 index 000000000..dc8c8c2a1 --- /dev/null +++ b/data/hfopenllm_v2/TheDrunkenSnail/Son-of-Rhodia/22c87268-7e49-42b4-9bbb-16a4b305c595.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TheDrunkenSnail_Son-of-Rhodia/1762652579.916397", + "retrieved_timestamp": "1762652579.916397", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheDrunkenSnail/Son-of-Rhodia", + "developer": "TheDrunkenSnail", + "inference_platform": "unknown", + "id": "TheDrunkenSnail/Son-of-Rhodia" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7046447869430887 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5097327647725524 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13141993957703926 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4202916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3607878989361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheHierophant/Underground-Cognitive-V0.3-test/872cc338-765c-4291-8b50-77b4bce719fd.json b/data/hfopenllm_v2/TheHierophant/Underground-Cognitive-V0.3-test/872cc338-765c-4291-8b50-77b4bce719fd.json new file mode 100644 index 000000000..e25147ffa --- /dev/null +++ b/data/hfopenllm_v2/TheHierophant/Underground-Cognitive-V0.3-test/872cc338-765c-4291-8b50-77b4bce719fd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TheHierophant_Underground-Cognitive-V0.3-test/1762652579.916598", + "retrieved_timestamp": "1762652579.916598", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheHierophant/Underground-Cognitive-V0.3-test", + "developer": "TheHierophant", + "inference_platform": "unknown", + "id": "TheHierophant/Underground-Cognitive-V0.3-test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4808297539417634 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5290131900998047 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43511458333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.331781914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TheTsar1209/nemo-carpmuscle-v0.1/8e834483-df6f-4d58-8257-f0cd1d8e3aa1.json b/data/hfopenllm_v2/TheTsar1209/nemo-carpmuscle-v0.1/8e834483-df6f-4d58-8257-f0cd1d8e3aa1.json new file mode 100644 index 000000000..681692c2e --- /dev/null +++ b/data/hfopenllm_v2/TheTsar1209/nemo-carpmuscle-v0.1/8e834483-df6f-4d58-8257-f0cd1d8e3aa1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TheTsar1209_nemo-carpmuscle-v0.1/1762652579.9168499", + "retrieved_timestamp": "1762652579.916851", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheTsar1209/nemo-carpmuscle-v0.1", + "developer": "TheTsar1209", + "inference_platform": "unknown", + "id": "TheTsar1209/nemo-carpmuscle-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2275639746982451 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5083529697101391 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4135 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3405917553191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Tijmen2/cosmosage-v3/f1eed2d5-89ca-4757-a5f9-9a90e811f075.json b/data/hfopenllm_v2/Tijmen2/cosmosage-v3/f1eed2d5-89ca-4757-a5f9-9a90e811f075.json new file mode 100644 index 000000000..efccccc27 --- /dev/null +++ b/data/hfopenllm_v2/Tijmen2/cosmosage-v3/f1eed2d5-89ca-4757-a5f9-9a90e811f075.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Tijmen2_cosmosage-v3/1762652579.918411", + "retrieved_timestamp": "1762652579.918412", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tijmen2/cosmosage-v3", + "developer": "Tijmen2", + "inference_platform": "unknown", + "id": "Tijmen2/cosmosage-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44823180272787316 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4550637900339029 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4198854166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24858710106382978 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.1/818cb0a4-7458-4cee-aca8-7cc72db341f8.json b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.1/818cb0a4-7458-4cee-aca8-7cc72db341f8.json new file mode 100644 index 000000000..4acc4eebc --- /dev/null +++ b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.1/818cb0a4-7458-4cee-aca8-7cc72db341f8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-Chat-v0.1/1762652579.918663", + "retrieved_timestamp": "1762652579.918664", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TinyLlama/TinyLlama-1.1B-Chat-v0.1", + "developer": "TinyLlama", + "inference_platform": "unknown", + "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1478543597654224 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30835294748680114 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22902684563758388 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35923958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10979055851063829 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.5/96454d40-4535-4439-87be-0ea7b55cd88a.json b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.5/96454d40-4535-4439-87be-0ea7b55cd88a.json new file mode 100644 index 000000000..4c4ebe171 --- /dev/null +++ b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.5/96454d40-4535-4439-87be-0ea7b55cd88a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-Chat-v0.5/1762652579.918914", + "retrieved_timestamp": "1762652579.918914", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TinyLlama/TinyLlama-1.1B-Chat-v0.5", + "developer": "TinyLlama", + "inference_platform": "unknown", + "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1633665341294432 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3105046915935697 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0037764350453172208 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2483221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36612500000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10962433510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.6/be032e7e-39b5-4153-81b9-c29115b231b4.json b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.6/be032e7e-39b5-4153-81b9-c29115b231b4.json new file mode 100644 index 000000000..64b9a7ceb --- /dev/null +++ b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v0.6/be032e7e-39b5-4153-81b9-c29115b231b4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-Chat-v0.6/1762652579.919127", + "retrieved_timestamp": "1762652579.919127", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", + "developer": "TinyLlama", + "inference_platform": "unknown", + "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15742119797692344 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3066976656166826 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34221875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11486037234042554 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v1.0/0a24d7b1-44eb-4f5b-ae2f-ddee372facd5.json b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v1.0/0a24d7b1-44eb-4f5b-ae2f-ddee372facd5.json new file mode 100644 index 000000000..a09d007f0 --- /dev/null +++ b/data/hfopenllm_v2/TinyLlama/TinyLlama-1.1B-Chat-v1.0/0a24d7b1-44eb-4f5b-ae2f-ddee372facd5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-Chat-v1.0/1762652579.9193401", + "retrieved_timestamp": "1762652579.919341", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", + "developer": "TinyLlama", + "inference_platform": "unknown", + "id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0595763684800773 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3103562867491015 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35152083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11012300531914894 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ToastyPigeon/Sto-vo-kor-12B/1c795b39-a382-4315-8b6b-626423b9ccfe.json b/data/hfopenllm_v2/ToastyPigeon/Sto-vo-kor-12B/1c795b39-a382-4315-8b6b-626423b9ccfe.json new file mode 100644 index 000000000..f43508f36 --- /dev/null +++ b/data/hfopenllm_v2/ToastyPigeon/Sto-vo-kor-12B/1c795b39-a382-4315-8b6b-626423b9ccfe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ToastyPigeon_Sto-vo-kor-12B/1762652579.920128", + "retrieved_timestamp": "1762652579.920129", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ToastyPigeon/Sto-vo-kor-12B", + "developer": "ToastyPigeon", + "inference_platform": "unknown", + "id": "ToastyPigeon/Sto-vo-kor-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5501225636865739 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5064617128925814 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10876132930513595 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39384375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33976063829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Trappu/Magnum-Picaro-0.7-v2-12b/77871404-f2e3-46f9-8c48-808fb89442cc.json b/data/hfopenllm_v2/Trappu/Magnum-Picaro-0.7-v2-12b/77871404-f2e3-46f9-8c48-808fb89442cc.json new file mode 100644 index 000000000..922ffdcaa --- /dev/null +++ b/data/hfopenllm_v2/Trappu/Magnum-Picaro-0.7-v2-12b/77871404-f2e3-46f9-8c48-808fb89442cc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Trappu_Magnum-Picaro-0.7-v2-12b/1762652579.920383", + "retrieved_timestamp": "1762652579.920383", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Trappu/Magnum-Picaro-0.7-v2-12b", + "developer": "Trappu", + "inference_platform": "unknown", + "id": "Trappu/Magnum-Picaro-0.7-v2-12b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.300278815764394 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5506661918828847 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47271875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35804521276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Trappu/Nemo-Picaro-12B/37534f85-e1ae-482b-89d0-480c4bbc50e7.json b/data/hfopenllm_v2/Trappu/Nemo-Picaro-12B/37534f85-e1ae-482b-89d0-480c4bbc50e7.json new file mode 100644 index 000000000..42fd4f97a --- /dev/null +++ b/data/hfopenllm_v2/Trappu/Nemo-Picaro-12B/37534f85-e1ae-482b-89d0-480c4bbc50e7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Trappu_Nemo-Picaro-12B/1762652579.92064", + "retrieved_timestamp": "1762652579.92064", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Trappu/Nemo-Picaro-12B", + "developer": "Trappu", + "inference_platform": "unknown", + "id": "Trappu/Nemo-Picaro-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2577139766929525 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5489586125997546 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271812080536913 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47259375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36045545212765956 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Tremontaine/L3-12B-Lunaris-v1/51e5f1f2-a43a-4ade-9207-1b15d172ba08.json b/data/hfopenllm_v2/Tremontaine/L3-12B-Lunaris-v1/51e5f1f2-a43a-4ade-9207-1b15d172ba08.json new file mode 100644 index 000000000..b5c065e2f --- /dev/null +++ b/data/hfopenllm_v2/Tremontaine/L3-12B-Lunaris-v1/51e5f1f2-a43a-4ade-9207-1b15d172ba08.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Tremontaine_L3-12B-Lunaris-v1/1762652579.920848", + "retrieved_timestamp": "1762652579.920848", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tremontaine/L3-12B-Lunaris-v1", + "developer": "Tremontaine", + "inference_platform": "unknown", + "id": "Tremontaine/L3-12B-Lunaris-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6909311737301471 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5230217237244009 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08761329305135952 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3673645833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3774933510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 11.52 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Annunaki-12b/28f9e91f-b32f-4b8f-ae18-126c7bbe6e7d.json b/data/hfopenllm_v2/Triangle104/Annunaki-12b/28f9e91f-b32f-4b8f-ae18-126c7bbe6e7d.json new file mode 100644 index 000000000..46e556f25 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Annunaki-12b/28f9e91f-b32f-4b8f-ae18-126c7bbe6e7d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Annunaki-12b/1762652579.921084", + "retrieved_timestamp": "1762652579.921084", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Annunaki-12b", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Annunaki-12b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3872070550583563 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5498969437971782 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1216012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44087499999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3720910904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/BigTalker-Lite-8B/befea823-7dc5-4e69-81e3-e75c4ff117ac.json b/data/hfopenllm_v2/Triangle104/BigTalker-Lite-8B/befea823-7dc5-4e69-81e3-e75c4ff117ac.json new file mode 100644 index 000000000..5a16fb39d --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/BigTalker-Lite-8B/befea823-7dc5-4e69-81e3-e75c4ff117ac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_BigTalker-Lite-8B/1762652579.92133", + "retrieved_timestamp": "1762652579.921331", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/BigTalker-Lite-8B", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/BigTalker-Lite-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3689222374411007 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5308138241234059 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10196374622356495 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42084375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34308510638297873 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Chatty-Harry_V2.0/f2dcc214-e25c-4c73-97f0-4e47304df09b.json b/data/hfopenllm_v2/Triangle104/Chatty-Harry_V2.0/f2dcc214-e25c-4c73-97f0-4e47304df09b.json new file mode 100644 index 000000000..2691536ab --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Chatty-Harry_V2.0/f2dcc214-e25c-4c73-97f0-4e47304df09b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Chatty-Harry_V2.0/1762652579.921529", + "retrieved_timestamp": "1762652579.92153", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Chatty-Harry_V2.0", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Chatty-Harry_V2.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3325520729442324 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5318928049062546 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13897280966767372 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40782291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36826795212765956 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Chatty-Harry_V3.0/b9b23a78-beea-4c4b-8bb8-d5a18a05ffce.json b/data/hfopenllm_v2/Triangle104/Chatty-Harry_V3.0/b9b23a78-beea-4c4b-8bb8-d5a18a05ffce.json new file mode 100644 index 000000000..f39117824 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Chatty-Harry_V3.0/b9b23a78-beea-4c4b-8bb8-d5a18a05ffce.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Chatty-Harry_V3.0/1762652579.9217439", + "retrieved_timestamp": "1762652579.9217439", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Chatty-Harry_V3.0", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Chatty-Harry_V3.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36749823800848413 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5526193453608234 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11253776435045318 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44084375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37017952127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Chronos-Prism_V1.0/13bb7db2-9d89-4dce-950a-14ccfb3492aa.json b/data/hfopenllm_v2/Triangle104/Chronos-Prism_V1.0/13bb7db2-9d89-4dce-950a-14ccfb3492aa.json new file mode 100644 index 000000000..987688982 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Chronos-Prism_V1.0/13bb7db2-9d89-4dce-950a-14ccfb3492aa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Chronos-Prism_V1.0/1762652579.921948", + "retrieved_timestamp": "1762652579.921948", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Chronos-Prism_V1.0", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Chronos-Prism_V1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3259329689667859 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5554188807010064 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12009063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4262708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36727061170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-10B-Harmony/ff136a9d-7e29-4a44-86be-c69bc115102e.json b/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-10B-Harmony/ff136a9d-7e29-4a44-86be-c69bc115102e.json new file mode 100644 index 000000000..99f4ed335 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-10B-Harmony/ff136a9d-7e29-4a44-86be-c69bc115102e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_DS-R1-Distill-Q2.5-10B-Harmony/1762652579.9225988", + "retrieved_timestamp": "1762652579.9226", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/DS-R1-Distill-Q2.5-10B-Harmony", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/DS-R1-Distill-Q2.5-10B-Harmony" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17508211545366295 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2643276743386568 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2105704697986577 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31276041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11727061170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 10.366 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1/63bc0215-741c-48ab-8ce3-d4c036c74a42.json b/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1/63bc0215-741c-48ab-8ce3-d4c036c74a42.json new file mode 100644 index 000000000..58fd8b1a1 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1/63bc0215-741c-48ab-8ce3-d4c036c74a42.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_DS-R1-Distill-Q2.5-14B-Harmony_V0.1/1762652579.9228039", + "retrieved_timestamp": "1762652579.922805", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4515042309959796 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5783379428926061 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5551359516616314 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3934563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5566875000000001 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4601063829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-7B-RP/5515e597-5f9f-46eb-8d3f-0482bdd69715.json b/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-7B-RP/5515e597-5f9f-46eb-8d3f-0482bdd69715.json new file mode 100644 index 000000000..47f835353 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/DS-R1-Distill-Q2.5-7B-RP/5515e597-5f9f-46eb-8d3f-0482bdd69715.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_DS-R1-Distill-Q2.5-7B-RP/1762652579.923009", + "retrieved_timestamp": "1762652579.923009", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/DS-R1-Distill-Q2.5-7B-RP", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/DS-R1-Distill-Q2.5-7B-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34454248061809334 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43834886662348205 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46827794561933533 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40302083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2890625 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Dark-Chivalry_V1.0/ed3b441b-272c-4bc4-8839-aa6055a6ccbc.json b/data/hfopenllm_v2/Triangle104/Dark-Chivalry_V1.0/ed3b441b-272c-4bc4-8839-aa6055a6ccbc.json new file mode 100644 index 000000000..c0b531106 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Dark-Chivalry_V1.0/ed3b441b-272c-4bc4-8839-aa6055a6ccbc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Dark-Chivalry_V1.0/1762652579.923868", + "retrieved_timestamp": "1762652579.923869", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Dark-Chivalry_V1.0", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Dark-Chivalry_V1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4325700253106203 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4974207759950637 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13141993957703926 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4181770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34441489361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B/2d57a30c-8a0e-4f18-bb2d-6bf4536bbc86.json b/data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B/2d57a30c-8a0e-4f18-bb2d-6bf4536bbc86.json new file mode 100644 index 000000000..47a5d180b --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B/2d57a30c-8a0e-4f18-bb2d-6bf4536bbc86.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Distilled-DarkPlanet-Allades-8B/1762652579.9240808", + "retrieved_timestamp": "1762652579.9240808", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Distilled-DarkPlanet-Allades-8B", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Distilled-DarkPlanet-Allades-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3460163477351206 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4633948672868899 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29014295212765956 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B_TIES/9bff68b3-82a4-49b5-90a7-3c0038ddc35a.json b/data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B_TIES/9bff68b3-82a4-49b5-90a7-3c0038ddc35a.json new file mode 100644 index 000000000..4d82fd62f --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Distilled-DarkPlanet-Allades-8B_TIES/9bff68b3-82a4-49b5-90a7-3c0038ddc35a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Distilled-DarkPlanet-Allades-8B_TIES/1762652579.924282", + "retrieved_timestamp": "1762652579.924282", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Distilled-DarkPlanet-Allades-8B_TIES", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Distilled-DarkPlanet-Allades-8B_TIES" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3891807071902552 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5041556910813355 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09063444108761329 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3868020833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.340093085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Distilled-Whiskey-8b/cf34d222-197f-4d3d-9786-fb5c019f2552.json b/data/hfopenllm_v2/Triangle104/Distilled-Whiskey-8b/cf34d222-197f-4d3d-9786-fb5c019f2552.json new file mode 100644 index 000000000..45895340c --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Distilled-Whiskey-8b/cf34d222-197f-4d3d-9786-fb5c019f2552.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Distilled-Whiskey-8b/1762652579.924494", + "retrieved_timestamp": "1762652579.9244952", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Distilled-Whiskey-8b", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Distilled-Whiskey-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34476743928332376 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5027820189600739 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2545317220543807 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41721874999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3366855053191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Hermes3-L3.1-DirtyHarry-8B/a8086735-c7a7-48b5-9219-829e288040f5.json b/data/hfopenllm_v2/Triangle104/Hermes3-L3.1-DirtyHarry-8B/a8086735-c7a7-48b5-9219-829e288040f5.json new file mode 100644 index 000000000..5ce5d658b --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Hermes3-L3.1-DirtyHarry-8B/a8086735-c7a7-48b5-9219-829e288040f5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Hermes3-L3.1-DirtyHarry-8B/1762652579.925645", + "retrieved_timestamp": "1762652579.925645", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Hermes3-L3.1-DirtyHarry-8B", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Hermes3-L3.1-DirtyHarry-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32423414318452815 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5066388671914118 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07175226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4068958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3338597074468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Herodotos-14B/271dbfc3-d9cf-4cb7-b1c0-175f016ed32b.json b/data/hfopenllm_v2/Triangle104/Herodotos-14B/271dbfc3-d9cf-4cb7-b1c0-175f016ed32b.json new file mode 100644 index 000000000..d464b5604 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Herodotos-14B/271dbfc3-d9cf-4cb7-b1c0-175f016ed32b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Herodotos-14B/1762652579.925863", + "retrieved_timestamp": "1762652579.925863", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Herodotos-14B", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Herodotos-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4667415790103592 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6435044367110887 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5045317220543807 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3733221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4795416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5290059840425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Herodotos-14B_V0.1/3c6d1b1b-465a-4b97-83ed-d2ebd27a905e.json b/data/hfopenllm_v2/Triangle104/Herodotos-14B_V0.1/3c6d1b1b-465a-4b97-83ed-d2ebd27a905e.json new file mode 100644 index 000000000..ad2344f28 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Herodotos-14B_V0.1/3c6d1b1b-465a-4b97-83ed-d2ebd27a905e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Herodotos-14B_V0.1/1762652579.9261289", + "retrieved_timestamp": "1762652579.926136", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Herodotos-14B_V0.1", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Herodotos-14B_V0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1878715142488597 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30172239497895226 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22399328859060402 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3683854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11643949468085106 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink/4eed8b1b-591d-403b-96f4-c6db11e8b234.json b/data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink/4eed8b1b-591d-403b-96f4-c6db11e8b234.json new file mode 100644 index 000000000..fe3d62274 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink/4eed8b1b-591d-403b-96f4-c6db11e8b234.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_L3.1-8B-Dusky-Ink/1762652579.926589", + "retrieved_timestamp": "1762652579.92659", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/L3.1-8B-Dusky-Ink", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/L3.1-8B-Dusky-Ink" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4529780981130068 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5097902234872148 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12311178247734139 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4223958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36826795212765956 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink_v0.r1/a43e1d8d-8a9e-445b-9023-fc6d4a41fcfc.json b/data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink_v0.r1/a43e1d8d-8a9e-445b-9023-fc6d4a41fcfc.json new file mode 100644 index 000000000..834e51d96 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/L3.1-8B-Dusky-Ink_v0.r1/a43e1d8d-8a9e-445b-9023-fc6d4a41fcfc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_L3.1-8B-Dusky-Ink_v0.r1/1762652579.926839", + "retrieved_timestamp": "1762652579.92684", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/L3.1-8B-Dusky-Ink_v0.r1", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/L3.1-8B-Dusky-Ink_v0.r1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19848779017451473 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43372778578458115 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3988333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.320561835106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesBlackroot/d1c3467e-6189-4d6f-bedb-8c51fa8bfde6.json b/data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesBlackroot/d1c3467e-6189-4d6f-bedb-8c51fa8bfde6.json new file mode 100644 index 000000000..5699be2d6 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesBlackroot/d1c3467e-6189-4d6f-bedb-8c51fa8bfde6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_LThreePointOne-8B-HermesBlackroot/1762652579.927087", + "retrieved_timestamp": "1762652579.927088", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/LThreePointOne-8B-HermesBlackroot", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/LThreePointOne-8B-HermesBlackroot" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17920340252751588 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4998333246909241 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3585520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32845744680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesInk/1bb3c61f-2f72-4486-87ef-1e6d5ce58478.json b/data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesInk/1bb3c61f-2f72-4486-87ef-1e6d5ce58478.json new file mode 100644 index 000000000..8d6579eb9 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/LThreePointOne-8B-HermesInk/1bb3c61f-2f72-4486-87ef-1e6d5ce58478.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_LThreePointOne-8B-HermesInk/1762652579.927316", + "retrieved_timestamp": "1762652579.927316", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/LThreePointOne-8B-HermesInk", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/LThreePointOne-8B-HermesInk" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4031192790684273 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5222765555856439 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17220543806646527 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4129375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34674202127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Minerva-1.5b/26810cc0-541f-4ca5-b76e-f1a63baa61f6.json b/data/hfopenllm_v2/Triangle104/Minerva-1.5b/26810cc0-541f-4ca5-b76e-f1a63baa61f6.json new file mode 100644 index 000000000..7490e4074 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Minerva-1.5b/26810cc0-541f-4ca5-b76e-f1a63baa61f6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-1.5b/1762652579.9280179", + "retrieved_timestamp": "1762652579.9280179", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Minerva-1.5b", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Minerva-1.5b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2694295580171722 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4025709779119226 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1027190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3655 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.269780585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Minerva-1.5b_V0.2/fc5be34b-0fad-4fce-9df1-851e4fd3119d.json b/data/hfopenllm_v2/Triangle104/Minerva-1.5b_V0.2/fc5be34b-0fad-4fce-9df1-851e4fd3119d.json new file mode 100644 index 000000000..48daaee8e --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Minerva-1.5b_V0.2/fc5be34b-0fad-4fce-9df1-851e4fd3119d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-1.5b_V0.2/1762652579.928302", + "retrieved_timestamp": "1762652579.928303", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Minerva-1.5b_V0.2", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Minerva-1.5b_V0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3083474071020448 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3989042137094949 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11404833836858005 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3960104166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29105718085106386 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Minerva-10b/848ac6f9-2bb5-48fe-821a-83f28da91f92.json b/data/hfopenllm_v2/Triangle104/Minerva-10b/848ac6f9-2bb5-48fe-821a-83f28da91f92.json new file mode 100644 index 000000000..b3a7843e6 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Minerva-10b/848ac6f9-2bb5-48fe-821a-83f28da91f92.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-10b/1762652579.928542", + "retrieved_timestamp": "1762652579.928543", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Minerva-10b", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Minerva-10b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1878715142488597 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4462036157096501 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36270833333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23179853723404256 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 10.067 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Minerva-14b-V0.1/fc4971f4-983d-40f9-810a-16ed998c1dad.json b/data/hfopenllm_v2/Triangle104/Minerva-14b-V0.1/fc4971f4-983d-40f9-810a-16ed998c1dad.json new file mode 100644 index 000000000..ed936019c --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Minerva-14b-V0.1/fc4971f4-983d-40f9-810a-16ed998c1dad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-14b-V0.1/1762652579.92906", + "retrieved_timestamp": "1762652579.9290612", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Minerva-14b-V0.1", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Minerva-14b-V0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0861292481726264 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6089792638423274 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30513595166163143 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36577181208053694 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47002083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5118018617021277 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Minerva-14b/54093f2d-15c3-465e-b876-5e4027deeb19.json b/data/hfopenllm_v2/Triangle104/Minerva-14b/54093f2d-15c3-465e-b876-5e4027deeb19.json new file mode 100644 index 000000000..81a09e96c --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Minerva-14b/54093f2d-15c3-465e-b876-5e4027deeb19.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-14b/1762652579.928819", + "retrieved_timestamp": "1762652579.928819", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Minerva-14b", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Minerva-14b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3467898509288687 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6300829439447851 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30513595166163143 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37416107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.476625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5193650265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Minerva-7b/aad7ed5c-d51d-46d7-af15-9c0447a02036.json b/data/hfopenllm_v2/Triangle104/Minerva-7b/aad7ed5c-d51d-46d7-af15-9c0447a02036.json new file mode 100644 index 000000000..fc72c1ba6 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Minerva-7b/aad7ed5c-d51d-46d7-af15-9c0447a02036.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-7b/1762652579.929375", + "retrieved_timestamp": "1762652579.929377", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Minerva-7b", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Minerva-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3724196243744376 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5498400501314606 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4143333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44439827127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Minerva-8b/08cc58ae-b1dc-489c-ba25-338bb11db2ee.json b/data/hfopenllm_v2/Triangle104/Minerva-8b/08cc58ae-b1dc-489c-ba25-338bb11db2ee.json new file mode 100644 index 000000000..9f68b562f --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Minerva-8b/08cc58ae-b1dc-489c-ba25-338bb11db2ee.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-8b/1762652579.9296892", + "retrieved_timestamp": "1762652579.9296901", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Minerva-8b", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Minerva-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17208451353519771 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46686093526780637 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.004531722054380665 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4272916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30892619680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.1/2a6af60c-eb46-46ae-8140-d050b48069ae.json b/data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.1/2a6af60c-eb46-46ae-8140-d050b48069ae.json new file mode 100644 index 000000000..49cf2abcd --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.1/2a6af60c-eb46-46ae-8140-d050b48069ae.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Pans_Gutenbergum_V0.1/1762652579.9304042", + "retrieved_timestamp": "1762652579.9304051", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Pans_Gutenbergum_V0.1", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Pans_Gutenbergum_V0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.309696050922663 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5541091780465247 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10574018126888217 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4528125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3696808510638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.2/f9eef8a7-1f23-46f1-b57a-062ffd1b81a1.json b/data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.2/f9eef8a7-1f23-46f1-b57a-062ffd1b81a1.json new file mode 100644 index 000000000..495dc3a49 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Pans_Gutenbergum_V0.2/f9eef8a7-1f23-46f1-b57a-062ffd1b81a1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Pans_Gutenbergum_V0.2/1762652579.93062", + "retrieved_timestamp": "1762652579.930621", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Pans_Gutenbergum_V0.2", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Pans_Gutenbergum_V0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3215113676157041 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.55257930562769 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06873111782477341 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46732291666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3585438829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Pantheon_ChatWaifu_V0.2/b57a86fa-8994-4004-a79d-d6da64e64b4d.json b/data/hfopenllm_v2/Triangle104/Pantheon_ChatWaifu_V0.2/b57a86fa-8994-4004-a79d-d6da64e64b4d.json new file mode 100644 index 000000000..1de2f54d8 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Pantheon_ChatWaifu_V0.2/b57a86fa-8994-4004-a79d-d6da64e64b4d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Pantheon_ChatWaifu_V0.2/1762652579.930828", + "retrieved_timestamp": "1762652579.930829", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Pantheon_ChatWaifu_V0.2", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Pantheon_ChatWaifu_V0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2682803849341968 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5531574435698693 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47551041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34424867021276595 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-14B-Instruct-1M-Harmony/1cf0506b-dbdd-4f7e-abf5-d812763a722e.json b/data/hfopenllm_v2/Triangle104/Q2.5-14B-Instruct-1M-Harmony/1cf0506b-dbdd-4f7e-abf5-d812763a722e.json new file mode 100644 index 000000000..1ca25d444 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Q2.5-14B-Instruct-1M-Harmony/1cf0506b-dbdd-4f7e-abf5-d812763a722e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-14B-Instruct-1M-Harmony/1762652579.93199", + "retrieved_timestamp": "1762652579.931991", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Q2.5-14B-Instruct-1M-Harmony", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Q2.5-14B-Instruct-1M-Harmony" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5986327389105351 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6338808682301471 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3768882175226586 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4795416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5074800531914894 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-AthensCOT/54a29a68-c69a-4b49-a87a-cb93c459146a.json b/data/hfopenllm_v2/Triangle104/Q2.5-AthensCOT/54a29a68-c69a-4b49-a87a-cb93c459146a.json new file mode 100644 index 000000000..eea53246e --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Q2.5-AthensCOT/54a29a68-c69a-4b49-a87a-cb93c459146a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-AthensCOT/1762652579.9322", + "retrieved_timestamp": "1762652579.932201", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Q2.5-AthensCOT", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Q2.5-AthensCOT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45727447616767947 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5541692533534606 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29154078549848944 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4578333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4379155585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-CodeR1-3B/74342d21-8eac-494c-95b9-4df1e828473b.json b/data/hfopenllm_v2/Triangle104/Q2.5-CodeR1-3B/74342d21-8eac-494c-95b9-4df1e828473b.json new file mode 100644 index 000000000..dc60e50d3 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Q2.5-CodeR1-3B/74342d21-8eac-494c-95b9-4df1e828473b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-CodeR1-3B/1762652579.932402", + "retrieved_timestamp": "1762652579.9324028", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Q2.5-CodeR1-3B", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Q2.5-CodeR1-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35875587884590665 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4660844324968853 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16389728096676737 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43154166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978723404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.085 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-EVACOT-7b/972dfbcf-a5d0-4f9f-a39c-089c30ac91ab.json b/data/hfopenllm_v2/Triangle104/Q2.5-EVACOT-7b/972dfbcf-a5d0-4f9f-a39c-089c30ac91ab.json new file mode 100644 index 000000000..6c3614d4c --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Q2.5-EVACOT-7b/972dfbcf-a5d0-4f9f-a39c-089c30ac91ab.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-EVACOT-7b/1762652579.9326148", + "retrieved_timestamp": "1762652579.932616", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Q2.5-EVACOT-7b", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Q2.5-EVACOT-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5784241368457914 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5505524946794311 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2824773413897281 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4498645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43309507978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-EvaHumane-RP/5146b3c9-9fdb-4a4e-a687-4bcf44b92309.json b/data/hfopenllm_v2/Triangle104/Q2.5-EvaHumane-RP/5146b3c9-9fdb-4a4e-a687-4bcf44b92309.json new file mode 100644 index 000000000..88a46147c --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Q2.5-EvaHumane-RP/5146b3c9-9fdb-4a4e-a687-4bcf44b92309.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-EvaHumane-RP/1762652579.932837", + "retrieved_timestamp": "1762652579.932837", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Q2.5-EvaHumane-RP", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Q2.5-EvaHumane-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3676234613048932 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5328196297646768 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29229607250755285 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42763541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4412400265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-Humane-RP/697ad115-9040-42e4-b94b-529ab27011ee.json b/data/hfopenllm_v2/Triangle104/Q2.5-Humane-RP/697ad115-9040-42e4-b94b-529ab27011ee.json new file mode 100644 index 000000000..1790025ec --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Q2.5-Humane-RP/697ad115-9040-42e4-b94b-529ab27011ee.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-Humane-RP/1762652579.933056", + "retrieved_timestamp": "1762652579.933057", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Q2.5-Humane-RP", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Q2.5-Humane-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4411627814199657 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5649289292164736 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3391238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4528125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44921875 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-Instruct-1M_Harmony/f4cbe998-8c9f-47c1-a267-5831a40e4cf2.json b/data/hfopenllm_v2/Triangle104/Q2.5-Instruct-1M_Harmony/f4cbe998-8c9f-47c1-a267-5831a40e4cf2.json new file mode 100644 index 000000000..b384db78e --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Q2.5-Instruct-1M_Harmony/f4cbe998-8c9f-47c1-a267-5831a40e4cf2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-Instruct-1M_Harmony/1762652579.933266", + "retrieved_timestamp": "1762652579.9332669", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Q2.5-Instruct-1M_Harmony", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Q2.5-Instruct-1M_Harmony" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6038034636985421 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5373243549676157 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3323262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46878125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43658577127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-R1-3B/a4e4a936-5203-4a9d-a698-417cc9da866f.json b/data/hfopenllm_v2/Triangle104/Q2.5-R1-3B/a4e4a936-5203-4a9d-a698-417cc9da866f.json new file mode 100644 index 000000000..1dc59aa09 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Q2.5-R1-3B/a4e4a936-5203-4a9d-a698-417cc9da866f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-R1-3B/1762652579.933473", + "retrieved_timestamp": "1762652579.933474", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Q2.5-R1-3B", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Q2.5-R1-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4213542290012722 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48124304786769817 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2673716012084592 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43197916666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38131648936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.085 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Q2.5-R1-7B/302fa968-5d2d-4750-a1e6-c87534c1eafa.json b/data/hfopenllm_v2/Triangle104/Q2.5-R1-7B/302fa968-5d2d-4750-a1e6-c87534c1eafa.json new file mode 100644 index 000000000..317443a11 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Q2.5-R1-7B/302fa968-5d2d-4750-a1e6-c87534c1eafa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-R1-7B/1762652579.933674", + "retrieved_timestamp": "1762652579.933675", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Q2.5-R1-7B", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Q2.5-R1-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1346150436397647 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30065625818799685 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3607291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1180186170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Robo-Gutenberg_V1.0/d891d79a-1ec2-44e3-83cd-c28739aecd6e.json b/data/hfopenllm_v2/Triangle104/Robo-Gutenberg_V1.0/d891d79a-1ec2-44e3-83cd-c28739aecd6e.json new file mode 100644 index 000000000..9eda526cd --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Robo-Gutenberg_V1.0/d891d79a-1ec2-44e3-83cd-c28739aecd6e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Robo-Gutenberg_V1.0/1762652579.9338748", + "retrieved_timestamp": "1762652579.933876", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Robo-Gutenberg_V1.0", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Robo-Gutenberg_V1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6007559940956662 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.653716560941194 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4561933534743202 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3859060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47436458333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5391456117021277 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.0/9f32b229-a2d5-409b-98d2-65681616aff4.json b/data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.0/9f32b229-a2d5-409b-98d2-65681616aff4.json new file mode 100644 index 000000000..11ce2471d --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.0/9f32b229-a2d5-409b-98d2-65681616aff4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Rocinante-Prism_V2.0/1762652579.9340868", + "retrieved_timestamp": "1762652579.9340868", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Rocinante-Prism_V2.0", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Rocinante-Prism_V2.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2616103051015749 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5361246041982355 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11102719033232629 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.445 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3640292553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.1/7a93ddc1-8694-4b16-8183-1b7f46dfba92.json b/data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.1/7a93ddc1-8694-4b16-8183-1b7f46dfba92.json new file mode 100644 index 000000000..69ffcd9e4 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Rocinante-Prism_V2.1/7a93ddc1-8694-4b16-8183-1b7f46dfba92.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Rocinante-Prism_V2.1/1762652579.934289", + "retrieved_timestamp": "1762652579.93429", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Rocinante-Prism_V2.1", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Rocinante-Prism_V2.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25584005992987496 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5332676401860506 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11253776435045318 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44896874999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3651097074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Rombos-Novasky-7B_V1c/a06dc6ef-5d16-402a-a855-b7feec423aa5.json b/data/hfopenllm_v2/Triangle104/Rombos-Novasky-7B_V1c/a06dc6ef-5d16-402a-a855-b7feec423aa5.json new file mode 100644 index 000000000..865c5a869 --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Rombos-Novasky-7B_V1c/a06dc6ef-5d16-402a-a855-b7feec423aa5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Rombos-Novasky-7B_V1c/1762652579.934721", + "retrieved_timestamp": "1762652579.934722", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Rombos-Novasky-7B_V1c", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Rombos-Novasky-7B_V1c" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40801517750679306 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4349247829177707 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08534743202416918 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44645833333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27376994680851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Triangle104/Set-70b/e25fa684-c237-4bce-8498-7bdfaac970a9.json b/data/hfopenllm_v2/Triangle104/Set-70b/e25fa684-c237-4bce-8498-7bdfaac970a9.json new file mode 100644 index 000000000..5bda0314a --- /dev/null +++ b/data/hfopenllm_v2/Triangle104/Set-70b/e25fa684-c237-4bce-8498-7bdfaac970a9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Set-70b/1762652579.934931", + "retrieved_timestamp": "1762652579.934931", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Set-70b", + "developer": "Triangle104", + "inference_platform": "unknown", + "id": "Triangle104/Set-70b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7642954028643998 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.70142939330013 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3640483383685801 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4463087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46956250000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5442154255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Tsunami-th/Tsunami-0.5-7B-Instruct/df3de449-9abc-4f0a-ba6e-caa48720893a.json b/data/hfopenllm_v2/Tsunami-th/Tsunami-0.5-7B-Instruct/df3de449-9abc-4f0a-ba6e-caa48720893a.json new file mode 100644 index 000000000..349433bf1 --- /dev/null +++ b/data/hfopenllm_v2/Tsunami-th/Tsunami-0.5-7B-Instruct/df3de449-9abc-4f0a-ba6e-caa48720893a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Tsunami-th_Tsunami-0.5-7B-Instruct/1762652579.935141", + "retrieved_timestamp": "1762652579.9351418", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tsunami-th/Tsunami-0.5-7B-Instruct", + "developer": "Tsunami-th", + "inference_platform": "unknown", + "id": "Tsunami-th/Tsunami-0.5-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7400153814102137 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.552369427738073 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5045317220543807 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42571875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44132313829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Tsunami-th/Tsunami-0.5x-7B-Instruct/fec678b9-c51b-4945-8d4f-f06af6528227.json b/data/hfopenllm_v2/Tsunami-th/Tsunami-0.5x-7B-Instruct/fec678b9-c51b-4945-8d4f-f06af6528227.json new file mode 100644 index 000000000..ba92c73ae --- /dev/null +++ b/data/hfopenllm_v2/Tsunami-th/Tsunami-0.5x-7B-Instruct/fec678b9-c51b-4945-8d4f-f06af6528227.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Tsunami-th_Tsunami-0.5x-7B-Instruct/1762652579.9353971", + "retrieved_timestamp": "1762652579.9353979", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tsunami-th/Tsunami-0.5x-7B-Instruct", + "developer": "Tsunami-th", + "inference_platform": "unknown", + "id": "Tsunami-th/Tsunami-0.5x-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.709915247099917 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5592865858560252 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4206948640483384 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46667708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44581117021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-14B-Instruct/11262698-480b-425b-b013-f362fae2f254.json b/data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-14B-Instruct/11262698-480b-425b-b013-f362fae2f254.json new file mode 100644 index 000000000..6bd0f1fad --- /dev/null +++ b/data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-14B-Instruct/11262698-480b-425b-b013-f362fae2f254.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Tsunami-th_Tsunami-1.0-14B-Instruct/1762652579.935597", + "retrieved_timestamp": "1762652579.935597", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tsunami-th/Tsunami-1.0-14B-Instruct", + "developer": "Tsunami-th", + "inference_platform": "unknown", + "id": "Tsunami-th/Tsunami-1.0-14B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7829049145157072 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6438763263011559 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45845921450151056 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3565436241610738 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44593750000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5248503989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-7B-Instruct/ccffe03b-c166-48de-8516-8253b2c2f96e.json b/data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-7B-Instruct/ccffe03b-c166-48de-8516-8253b2c2f96e.json new file mode 100644 index 000000000..8ee9ddac4 --- /dev/null +++ b/data/hfopenllm_v2/Tsunami-th/Tsunami-1.0-7B-Instruct/ccffe03b-c166-48de-8516-8253b2c2f96e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Tsunami-th_Tsunami-1.0-7B-Instruct/1762652579.9358132", + "retrieved_timestamp": "1762652579.9358132", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tsunami-th/Tsunami-1.0-7B-Instruct", + "developer": "Tsunami-th", + "inference_platform": "unknown", + "id": "Tsunami-th/Tsunami-1.0-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.730872972601586 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.549071195618326 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4335347432024169 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44928125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4424035904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1/07af3512-a045-435e-a965-8daa0836905d.json b/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1/07af3512-a045-435e-a965-8daa0836905d.json new file mode 100644 index 000000000..65059a138 --- /dev/null +++ b/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1/07af3512-a045-435e-a965-8daa0836905d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter1/1762652579.9367309", + "retrieved_timestamp": "1762652579.9367318", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1", + "developer": "UCLA-AGI", + "inference_platform": "unknown", + "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7298988904994304 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5057890691082708 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1148036253776435 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3567916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37109375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2/0c5c315f-63c4-427e-a307-1422a197895c.json b/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2/0c5c315f-63c4-427e-a307-1422a197895c.json new file mode 100644 index 000000000..6a10c1cf0 --- /dev/null +++ b/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2/0c5c315f-63c4-427e-a307-1422a197895c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter2/1762652579.93697", + "retrieved_timestamp": "1762652579.936971", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2", + "developer": "UCLA-AGI", + "inference_platform": "unknown", + "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6988745417713889 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5088696278852957 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10347432024169184 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35942708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36918218085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/49e095af-ed90-4e64-b476-4fc62d6e6997.json b/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/49e095af-ed90-4e64-b476-4fc62d6e6997.json new file mode 100644 index 000000000..a16ae4e32 --- /dev/null +++ b/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/49e095af-ed90-4e64-b476-4fc62d6e6997.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/1762652579.937367", + "retrieved_timestamp": "1762652579.9373682", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3", + "developer": "UCLA-AGI", + "inference_platform": "unknown", + "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.67029814226253 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5076407742830437 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07175226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3647291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3657746010638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/d8d05a10-8889-40aa-b56f-365e0a12052c.json b/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/d8d05a10-8889-40aa-b56f-365e0a12052c.json new file mode 100644 index 000000000..595b41baa --- /dev/null +++ b/data/hfopenllm_v2/UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3/d8d05a10-8889-40aa-b56f-365e0a12052c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/1762652579.937166", + "retrieved_timestamp": "1762652579.9371672", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3", + "developer": "UCLA-AGI", + "inference_platform": "unknown", + "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6834122350917787 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.50795799761689 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09592145015105741 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36606249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3644448138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Undi95/MG-FinalMix-72B/3d3598fa-4b23-4ec6-a010-fb20232a5121.json b/data/hfopenllm_v2/Undi95/MG-FinalMix-72B/3d3598fa-4b23-4ec6-a010-fb20232a5121.json new file mode 100644 index 000000000..4e217e707 --- /dev/null +++ b/data/hfopenllm_v2/Undi95/MG-FinalMix-72B/3d3598fa-4b23-4ec6-a010-fb20232a5121.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Undi95_MG-FinalMix-72B/1762652579.938925", + "retrieved_timestamp": "1762652579.938925", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Undi95/MG-FinalMix-72B", + "developer": "Undi95", + "inference_platform": "unknown", + "id": "Undi95/MG-FinalMix-72B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8013648231137825 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6973017446417747 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3972809667673716 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3850671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48227083333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.542719414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/V3N0M/Jenna-Tiny-2.0/d9785857-b164-4d38-8d03-0e03e2d0fbf5.json b/data/hfopenllm_v2/V3N0M/Jenna-Tiny-2.0/d9785857-b164-4d38-8d03-0e03e2d0fbf5.json new file mode 100644 index 000000000..e54b54f28 --- /dev/null +++ b/data/hfopenllm_v2/V3N0M/Jenna-Tiny-2.0/d9785857-b164-4d38-8d03-0e03e2d0fbf5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/V3N0M_Jenna-Tiny-2.0/1762652579.9394162", + "retrieved_timestamp": "1762652579.9394171", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "V3N0M/Jenna-Tiny-2.0", + "developer": "V3N0M", + "inference_platform": "unknown", + "id": "V3N0M/Jenna-Tiny-2.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2309361383351729 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31479264061817097 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33666666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1146941489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.631 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct/eb8adbdf-2cfb-4e9e-8f75-ce2734907725.json b/data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct/eb8adbdf-2cfb-4e9e-8f75-ce2734907725.json new file mode 100644 index 000000000..4f9743b7f --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct/eb8adbdf-2cfb-4e9e-8f75-ce2734907725.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_Llama-3-SauerkrautLM-70b-Instruct/1762652579.939689", + "retrieved_timestamp": "1762652579.939689", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "id": "VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8044621604010691 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6663247245334951 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2280966767371601 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43393750000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5392287234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct/ad99531d-4d52-4175-8ebd-cb172b4577de.json b/data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct/ad99531d-4d52-4175-8ebd-cb172b4577de.json new file mode 100644 index 000000000..48507fccb --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct/ad99531d-4d52-4175-8ebd-cb172b4577de.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_Llama-3-SauerkrautLM-8b-Instruct/1762652579.93995", + "retrieved_timestamp": "1762652579.9399512", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "id": "VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.744536718130117 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.494337579362695 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42410416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3857214095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct/2e3eca4b-4c15-4b3b-8c44-3a23312a0797.json b/data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct/2e3eca4b-4c15-4b3b-8c44-3a23312a0797.json new file mode 100644 index 000000000..c13a18d5f --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct/2e3eca4b-4c15-4b3b-8c44-3a23312a0797.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_Llama-3.1-SauerkrautLM-70b-Instruct/1762652579.940237", + "retrieved_timestamp": "1762652579.940238", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "id": "VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8656365111238181 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7006249194404001 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3693353474320242 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3414429530201342 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4710833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5334940159574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct/aa425d3e-e363-46bf-a5fb-cbf524657e85.json b/data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct/aa425d3e-e363-46bf-a5fb-cbf524657e85.json new file mode 100644 index 000000000..b4825eef4 --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct/aa425d3e-e363-46bf-a5fb-cbf524657e85.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_Llama-3.1-SauerkrautLM-8b-Instruct/1762652579.9404852", + "retrieved_timestamp": "1762652579.940486", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "id": "VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8017393848322452 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5114932190011187 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19410876132930513 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4148020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3890458776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-1.5b/22ae39ae-883c-43a7-abbe-3213b9035b58.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-1.5b/22ae39ae-883c-43a7-abbe-3213b9035b58.json new file mode 100644 index 000000000..5bf53c22c --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-1.5b/22ae39ae-883c-43a7-abbe-3213b9035b58.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-1.5b/1762652579.940706", + "retrieved_timestamp": "1762652579.940707", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VAGOsolutions/SauerkrautLM-1.5b", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "id": "VAGOsolutions/SauerkrautLM-1.5b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24040324117785256 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3703912164863146 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03625377643504532 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37390625000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21509308510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-HerO/be74b2d6-28b9-4227-b0ec-fbad4b7dada6.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-HerO/be74b2d6-28b9-4227-b0ec-fbad4b7dada6.json new file mode 100644 index 000000000..8f4b3ee9f --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-HerO/be74b2d6-28b9-4227-b0ec-fbad4b7dada6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-7b-HerO/1762652579.940931", + "retrieved_timestamp": "1762652579.940931", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VAGOsolutions/SauerkrautLM-7b-HerO", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "id": "VAGOsolutions/SauerkrautLM-7b-HerO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.534610389322553 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49044349935812964 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03927492447129909 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39238541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30460438829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-LaserChat/35512aeb-611a-46a8-849e-442fc3fcc23a.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-LaserChat/35512aeb-611a-46a8-849e-442fc3fcc23a.json new file mode 100644 index 000000000..1dbb1a06d --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-7b-LaserChat/35512aeb-611a-46a8-849e-442fc3fcc23a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-7b-LaserChat/1762652579.941142", + "retrieved_timestamp": "1762652579.941143", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VAGOsolutions/SauerkrautLM-7b-LaserChat", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "id": "VAGOsolutions/SauerkrautLM-7b-LaserChat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5987823419637672 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45432707993295685 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07779456193353475 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4148020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3304521276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct/f105fe57-632a-4e3b-bbcb-f063f2e10874.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct/f105fe57-632a-4e3b-bbcb-f063f2e10874.json new file mode 100644 index 000000000..62a26c69b --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct/f105fe57-632a-4e3b-bbcb-f063f2e10874.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Mixtral-8x7B-Instruct/1762652579.9418082", + "retrieved_timestamp": "1762652579.941809", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "id": "VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5601891869129465 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5277342269858817 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09818731117824774 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42041666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3650265957446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct/b5db7846-f777-4fa8-86e9-f09fdee1dfee.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct/b5db7846-f777-4fa8-86e9-f09fdee1dfee.json new file mode 100644 index 000000000..bab93b0c7 --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct/b5db7846-f777-4fa8-86e9-f09fdee1dfee.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Nemo-12b-Instruct/1762652579.942016", + "retrieved_timestamp": "1762652579.942017", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "id": "VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6112969144093228 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5214128647611115 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12235649546827794 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4468958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33851396276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-SOLAR-Instruct/24fbb409-3b1a-4ed2-8866-547a7f02c5dc.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-SOLAR-Instruct/24fbb409-3b1a-4ed2-8866-547a7f02c5dc.json new file mode 100644 index 000000000..faedb1cc8 --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-SOLAR-Instruct/24fbb409-3b1a-4ed2-8866-547a7f02c5dc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-SOLAR-Instruct/1762652579.942544", + "retrieved_timestamp": "1762652579.942544", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VAGOsolutions/SauerkrautLM-SOLAR-Instruct", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "id": "VAGOsolutions/SauerkrautLM-SOLAR-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49172085621705963 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5169447300097646 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3965416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31831781914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-DPO/e4b13fb1-11c0-4696-856f-de393fe2f8b2.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-DPO/e4b13fb1-11c0-4696-856f-de393fe2f8b2.json new file mode 100644 index 000000000..4c867ce64 --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-DPO/e4b13fb1-11c0-4696-856f-de393fe2f8b2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-v2-14b-DPO/1762652579.943197", + "retrieved_timestamp": "1762652579.943197", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VAGOsolutions/SauerkrautLM-v2-14b-DPO", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "id": "VAGOsolutions/SauerkrautLM-v2-14b-DPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7411645544931892 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6560374350756156 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3164652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43746875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.51171875 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-SFT/d1b47391-f36e-4819-8093-5aff774dff94.json b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-SFT/d1b47391-f36e-4819-8093-5aff774dff94.json new file mode 100644 index 000000000..8d1efc2ca --- /dev/null +++ b/data/hfopenllm_v2/VAGOsolutions/SauerkrautLM-v2-14b-SFT/d1b47391-f36e-4819-8093-5aff774dff94.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-v2-14b-SFT/1762652579.94341", + "retrieved_timestamp": "1762652579.9434109", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VAGOsolutions/SauerkrautLM-v2-14b-SFT", + "developer": "VAGOsolutions", + "inference_platform": "unknown", + "id": "VAGOsolutions/SauerkrautLM-v2-14b-SFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6948529900663573 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6210355880693049 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3285498489425982 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33557046979865773 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.417875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5205285904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24/b0332107-4b84-4c0a-b488-187fb3d534ae.json b/data/hfopenllm_v2/Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24/b0332107-4b84-4c0a-b488-187fb3d534ae.json new file mode 100644 index 000000000..da902168e --- /dev/null +++ b/data/hfopenllm_v2/Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24/b0332107-4b84-4c0a-b488-187fb3d534ae.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Vikhrmodels_Vikhr-Llama3.1-8B-Instruct-R-21-09-24/1762652579.9476302", + "retrieved_timestamp": "1762652579.9476311", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24", + "developer": "Vikhrmodels", + "inference_platform": "unknown", + "id": "Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.643145742186288 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.527224269970207 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2175226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24496644295302014 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3753958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3547207446808511 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24/787cc582-61da-4afd-bfac-431377809fd9.json b/data/hfopenllm_v2/Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24/787cc582-61da-4afd-bfac-431377809fd9.json new file mode 100644 index 000000000..a299a4275 --- /dev/null +++ b/data/hfopenllm_v2/Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24/787cc582-61da-4afd-bfac-431377809fd9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Vikhrmodels_Vikhr-Nemo-12B-Instruct-R-21-09-24/1762652579.947979", + "retrieved_timestamp": "1762652579.94798", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24", + "developer": "Vikhrmodels", + "inference_platform": "unknown", + "id": "Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5999315150467426 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5212309052827618 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1714501510574018 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40730208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33976063829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Weyaxi/Bagel-Hermes-2x34B/5b614673-6566-4b82-bf7c-13268ebb1577.json b/data/hfopenllm_v2/Weyaxi/Bagel-Hermes-2x34B/5b614673-6566-4b82-bf7c-13268ebb1577.json new file mode 100644 index 000000000..adbffd722 --- /dev/null +++ b/data/hfopenllm_v2/Weyaxi/Bagel-Hermes-2x34B/5b614673-6566-4b82-bf7c-13268ebb1577.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Weyaxi_Bagel-Hermes-2x34B/1762652579.948213", + "retrieved_timestamp": "1762652579.948214", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Weyaxi/Bagel-Hermes-2x34B", + "developer": "Weyaxi", + "inference_platform": "unknown", + "id": "Weyaxi/Bagel-Hermes-2x34B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5431532777474878 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49166555632285514 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45166666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4588597074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 60.814 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Weyaxi/Bagel-Hermes-34B-Slerp/28439ab5-0e5f-4dae-a98a-e0c1b743a8b0.json b/data/hfopenllm_v2/Weyaxi/Bagel-Hermes-34B-Slerp/28439ab5-0e5f-4dae-a98a-e0c1b743a8b0.json new file mode 100644 index 000000000..3336bed36 --- /dev/null +++ b/data/hfopenllm_v2/Weyaxi/Bagel-Hermes-34B-Slerp/28439ab5-0e5f-4dae-a98a-e0c1b743a8b0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Weyaxi_Bagel-Hermes-34B-Slerp/1762652579.948482", + "retrieved_timestamp": "1762652579.948482", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Weyaxi/Bagel-Hermes-34B-Slerp", + "developer": "Weyaxi", + "inference_platform": "unknown", + "id": "Weyaxi/Bagel-Hermes-34B-Slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4602720780861448 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5921903605860047 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3347315436241611 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46220833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4703291223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Weyaxi/Einstein-v4-7B/035c5e35-0ebe-4e91-a598-8d01688462a3.json b/data/hfopenllm_v2/Weyaxi/Einstein-v4-7B/035c5e35-0ebe-4e91-a598-8d01688462a3.json new file mode 100644 index 000000000..dbfbe27a1 --- /dev/null +++ b/data/hfopenllm_v2/Weyaxi/Einstein-v4-7B/035c5e35-0ebe-4e91-a598-8d01688462a3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v4-7B/1762652579.948704", + "retrieved_timestamp": "1762652579.948705", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Weyaxi/Einstein-v4-7B", + "developer": "Weyaxi", + "inference_platform": "unknown", + "id": "Weyaxi/Einstein-v4-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47081299839980145 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38494699692741774 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0188821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4681666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22589760638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct/8ddec5bb-ab90-4c98-8482-a412e7735246.json b/data/hfopenllm_v2/Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct/8ddec5bb-ab90-4c98-8482-a412e7735246.json new file mode 100644 index 000000000..123ffa5e9 --- /dev/null +++ b/data/hfopenllm_v2/Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct/8ddec5bb-ab90-4c98-8482-a412e7735246.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Weyaxi_SauerkrautLM-UNA-SOLAR-Instruct/1762652579.950165", + "retrieved_timestamp": "1762652579.950166", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct", + "developer": "Weyaxi", + "inference_platform": "unknown", + "id": "Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4573243438520902 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5166357112030591 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04607250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.397875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31532579787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.0/ab4f785b-779f-423b-9905-31a3b66dfeff.json b/data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.0/ab4f785b-779f-423b-9905-31a3b66dfeff.json new file mode 100644 index 000000000..1c4d347f2 --- /dev/null +++ b/data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.0/ab4f785b-779f-423b-9905-31a3b66dfeff.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/WizardLMTeam_WizardLM-13B-V1.0/1762652579.9503958", + "retrieved_timestamp": "1762652579.950397", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "WizardLMTeam/WizardLM-13B-V1.0", + "developer": "WizardLMTeam", + "inference_platform": "unknown", + "id": "WizardLMTeam/WizardLM-13B-V1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18504900331121424 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29134447696551025 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34971875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11660571808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.2/f9d2286c-ed89-4c23-b6a2-c623373331cd.json b/data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.2/f9d2286c-ed89-4c23-b6a2-c623373331cd.json new file mode 100644 index 000000000..5e1bb1e52 --- /dev/null +++ b/data/hfopenllm_v2/WizardLMTeam/WizardLM-13B-V1.2/f9d2286c-ed89-4c23-b6a2-c623373331cd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/WizardLMTeam_WizardLM-13B-V1.2/1762652579.950676", + "retrieved_timestamp": "1762652579.950676", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "WizardLMTeam/WizardLM-13B-V1.2", + "developer": "WizardLMTeam", + "inference_platform": "unknown", + "id": "WizardLMTeam/WizardLM-13B-V1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3392465325336773 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44619994364600474 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0188821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43784375000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25191156914893614 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/WizardLMTeam/WizardLM-70B-V1.0/8c4ff628-41b6-4769-a33e-b1dbffa913cf.json b/data/hfopenllm_v2/WizardLMTeam/WizardLM-70B-V1.0/8c4ff628-41b6-4769-a33e-b1dbffa913cf.json new file mode 100644 index 000000000..428262959 --- /dev/null +++ b/data/hfopenllm_v2/WizardLMTeam/WizardLM-70B-V1.0/8c4ff628-41b6-4769-a33e-b1dbffa913cf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/WizardLMTeam_WizardLM-70B-V1.0/1762652579.950908", + "retrieved_timestamp": "1762652579.950909", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "WizardLMTeam/WizardLM-70B-V1.0", + "developer": "WizardLMTeam", + "inference_platform": "unknown", + "id": "WizardLMTeam/WizardLM-70B-V1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49514288753839814 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5590366047184262 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03927492447129909 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43911458333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34466422872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Wladastic/Mini-Think-Base-1B/5f9a01b0-632a-4ee4-aedc-279002c7496c.json b/data/hfopenllm_v2/Wladastic/Mini-Think-Base-1B/5f9a01b0-632a-4ee4-aedc-279002c7496c.json new file mode 100644 index 000000000..740782842 --- /dev/null +++ b/data/hfopenllm_v2/Wladastic/Mini-Think-Base-1B/5f9a01b0-632a-4ee4-aedc-279002c7496c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Wladastic_Mini-Think-Base-1B/1762652579.951128", + "retrieved_timestamp": "1762652579.9511292", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Wladastic/Mini-Think-Base-1B", + "developer": "Wladastic", + "inference_platform": "unknown", + "id": "Wladastic/Mini-Think-Base-1B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5588405430923283 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35741728048349203 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07326283987915408 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32748958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17719414893617022 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xclbr7/Arcanum-12b/2d0a414f-1cf2-4ae3-951b-ed69d1ef883f.json b/data/hfopenllm_v2/Xclbr7/Arcanum-12b/2d0a414f-1cf2-4ae3-951b-ed69d1ef883f.json new file mode 100644 index 000000000..29414f527 --- /dev/null +++ b/data/hfopenllm_v2/Xclbr7/Arcanum-12b/2d0a414f-1cf2-4ae3-951b-ed69d1ef883f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Xclbr7_Arcanum-12b/1762652579.9514", + "retrieved_timestamp": "1762652579.951401", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Xclbr7/Arcanum-12b", + "developer": "Xclbr7", + "inference_platform": "unknown", + "id": "Xclbr7/Arcanum-12b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2906864896253053 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5265359354118465 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11933534743202417 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41703124999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3586269946808511 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xclbr7/Hyena-12b/06eb233f-5182-4b9e-be3f-21c928eef397.json b/data/hfopenllm_v2/Xclbr7/Hyena-12b/06eb233f-5182-4b9e-be3f-21c928eef397.json new file mode 100644 index 000000000..1a8023a2c --- /dev/null +++ b/data/hfopenllm_v2/Xclbr7/Hyena-12b/06eb233f-5182-4b9e-be3f-21c928eef397.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Xclbr7_Hyena-12b/1762652579.9516642", + "retrieved_timestamp": "1762652579.951665", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Xclbr7/Hyena-12b", + "developer": "Xclbr7", + "inference_platform": "unknown", + "id": "Xclbr7/Hyena-12b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3404455733010634 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5457182415468321 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11329305135951662 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39842708333333327 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3439162234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xclbr7/caliburn-12b/e897d1fc-2c71-4c61-971b-eeddfae1b75c.json b/data/hfopenllm_v2/Xclbr7/caliburn-12b/e897d1fc-2c71-4c61-971b-eeddfae1b75c.json new file mode 100644 index 000000000..f39324733 --- /dev/null +++ b/data/hfopenllm_v2/Xclbr7/caliburn-12b/e897d1fc-2c71-4c61-971b-eeddfae1b75c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Xclbr7_caliburn-12b/1762652579.951879", + "retrieved_timestamp": "1762652579.95188", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Xclbr7/caliburn-12b", + "developer": "Xclbr7", + "inference_platform": "unknown", + "id": "Xclbr7/caliburn-12b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35763108551975425 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5518630300231809 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11253776435045318 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33640939597315433 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4291875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36751994680851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xclbr7/caliburn-v2-12b/18a12670-8785-44ef-a365-78ce797b8ba5.json b/data/hfopenllm_v2/Xclbr7/caliburn-v2-12b/18a12670-8785-44ef-a365-78ce797b8ba5.json new file mode 100644 index 000000000..600bec3b7 --- /dev/null +++ b/data/hfopenllm_v2/Xclbr7/caliburn-v2-12b/18a12670-8785-44ef-a365-78ce797b8ba5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Xclbr7_caliburn-v2-12b/1762652579.952102", + "retrieved_timestamp": "1762652579.952102", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Xclbr7/caliburn-v2-12b", + "developer": "Xclbr7", + "inference_platform": "unknown", + "id": "Xclbr7/caliburn-v2-12b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2966816934622358 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5141426125097639 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10498489425981873 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3263422818791946 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43703125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37840757978723405 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B/e582afbb-99f3-4b43-8ee7-b786680124a9.json b/data/hfopenllm_v2/Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B/e582afbb-99f3-4b43-8ee7-b786680124a9.json new file mode 100644 index 000000000..54d8d917d --- /dev/null +++ b/data/hfopenllm_v2/Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B/e582afbb-99f3-4b43-8ee7-b786680124a9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Reflection-L3.2-JametMiniMix-3B/1762652579.9550028", + "retrieved_timestamp": "1762652579.9550028", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B", + "developer": "Xiaojian9992024", + "inference_platform": "unknown", + "id": "Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46194541594081484 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4389528940684813 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11933534743202417 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36673958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29878656914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Yash21/TinyYi-7B-Test/d6a9abee-29ee-44e0-802c-c3e4354ebbac.json b/data/hfopenllm_v2/Yash21/TinyYi-7B-Test/d6a9abee-29ee-44e0-802c-c3e4354ebbac.json new file mode 100644 index 000000000..5dc104502 --- /dev/null +++ b/data/hfopenllm_v2/Yash21/TinyYi-7B-Test/d6a9abee-29ee-44e0-802c-c3e4354ebbac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Yash21_TinyYi-7B-Test/1762652579.960211", + "retrieved_timestamp": "1762652579.960212", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yash21/TinyYi-7B-Test", + "developer": "Yash21", + "inference_platform": "unknown", + "id": "Yash21/TinyYi-7B-Test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18564852369490728 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29098007801214715 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3364479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10912566489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/1PARAMMYL-8B-ModelStock/87231cbd-d911-434d-991b-1eb373cdde4f.json b/data/hfopenllm_v2/Youlln/1PARAMMYL-8B-ModelStock/87231cbd-d911-434d-991b-1eb373cdde4f.json new file mode 100644 index 000000000..8a862065f --- /dev/null +++ b/data/hfopenllm_v2/Youlln/1PARAMMYL-8B-ModelStock/87231cbd-d911-434d-991b-1eb373cdde4f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Youlln_1PARAMMYL-8B-ModelStock/1762652579.9604638", + "retrieved_timestamp": "1762652579.960465", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Youlln/1PARAMMYL-8B-ModelStock", + "developer": "Youlln", + "inference_platform": "unknown", + "id": "Youlln/1PARAMMYL-8B-ModelStock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5371336941537344 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5215839663555125 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1487915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4409375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4000166223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/2PRYMMAL-Yi1.5-6B-SLERP/e80773ef-5ca2-43de-ba99-a7a997aab7f0.json b/data/hfopenllm_v2/Youlln/2PRYMMAL-Yi1.5-6B-SLERP/e80773ef-5ca2-43de-ba99-a7a997aab7f0.json new file mode 100644 index 000000000..815b64a70 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/2PRYMMAL-Yi1.5-6B-SLERP/e80773ef-5ca2-43de-ba99-a7a997aab7f0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Youlln_2PRYMMAL-Yi1.5-6B-SLERP/1762652579.9607239", + "retrieved_timestamp": "1762652579.960725", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Youlln/2PRYMMAL-Yi1.5-6B-SLERP", + "developer": "Youlln", + "inference_platform": "unknown", + "id": "Youlln/2PRYMMAL-Yi1.5-6B-SLERP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28259351853083153 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46647504291710673 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11329305135951662 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47560416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3169880319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-12B/f3f55015-88c7-41ae-b588-9a1eedd56fc2.json b/data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-12B/f3f55015-88c7-41ae-b588-9a1eedd56fc2.json new file mode 100644 index 000000000..613e2b356 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-12B/f3f55015-88c7-41ae-b588-9a1eedd56fc2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-MIRAGE-1-12B/1762652579.96142", + "retrieved_timestamp": "1762652579.96142", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Youlln/ECE-MIRAGE-1-12B", + "developer": "Youlln", + "inference_platform": "unknown", + "id": "Youlln/ECE-MIRAGE-1-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20698081091503875 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30107140221306034 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3219375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11095412234042554 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 15.21 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-15B/f904e587-76ac-4583-9235-fcdd20d9a626.json b/data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-15B/f904e587-76ac-4583-9235-fcdd20d9a626.json new file mode 100644 index 000000000..90178b7c6 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-MIRAGE-1-15B/f904e587-76ac-4583-9235-fcdd20d9a626.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-MIRAGE-1-15B/1762652579.961622", + "retrieved_timestamp": "1762652579.961622", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Youlln/ECE-MIRAGE-1-15B", + "developer": "Youlln", + "inference_platform": "unknown", + "id": "Youlln/ECE-MIRAGE-1-15B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20698081091503875 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30107140221306034 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3219375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11095412234042554 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 15.21 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR/de30a84d-c8cc-4f3c-9eb4-3f58754dc46b.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR/de30a84d-c8cc-4f3c-9eb4-3f58754dc46b.json new file mode 100644 index 000000000..2d73d146d --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR/de30a84d-c8cc-4f3c-9eb4-3f58754dc46b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-FT-V3-MUSR/1762652579.962029", + "retrieved_timestamp": "1762652579.962029", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR", + "developer": "Youlln", + "inference_platform": "unknown", + "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15334977858748122 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3041148294962408 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02416918429003021 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24916107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36603125000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1644780585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3/45c46c5d-cf81-42d4-bf9e-61aca49b2959.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3/45c46c5d-cf81-42d4-bf9e-61aca49b2959.json new file mode 100644 index 000000000..cc39e6fb5 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V3/45c46c5d-cf81-42d4-bf9e-61aca49b2959.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-FT-V3/1762652579.9618208", + "retrieved_timestamp": "1762652579.9618208", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Youlln/ECE-PRYMMAL-0.5B-FT-V3", + "developer": "Youlln", + "inference_platform": "unknown", + "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16419101317836673 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30931341134548046 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0030211480362537764 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3644479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11610704787234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR/68382b86-8a68-428e-8338-144a76b8c293.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR/68382b86-8a68-428e-8338-144a76b8c293.json new file mode 100644 index 000000000..648059681 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR/68382b86-8a68-428e-8338-144a76b8c293.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-FT-V4-MUSR/1762652579.9622452", + "retrieved_timestamp": "1762652579.962246", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR", + "developer": "Youlln", + "inference_platform": "unknown", + "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1137570535069172 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3038362724383693 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3528854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13214760638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V2/c0fe65df-7e51-48ad-bf40-fd163804cad1.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V2/c0fe65df-7e51-48ad-bf40-fd163804cad1.json new file mode 100644 index 000000000..b59f85dba --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V2/c0fe65df-7e51-48ad-bf40-fd163804cad1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-SLERP-V2/1762652579.962454", + "retrieved_timestamp": "1762652579.962455", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V2", + "developer": "Youlln", + "inference_platform": "unknown", + "id": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1611934112599015 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2934774313772131 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0007552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3831145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10945811170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V3/d67c4d9a-d5cc-4b26-a439-44c87a299ee8.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V3/d67c4d9a-d5cc-4b26-a439-44c87a299ee8.json new file mode 100644 index 000000000..9a2f9bc04 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-0.5B-SLERP-V3/d67c4d9a-d5cc-4b26-a439-44c87a299ee8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-SLERP-V3/1762652579.9626722", + "retrieved_timestamp": "1762652579.9626722", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V3", + "developer": "Youlln", + "inference_platform": "unknown", + "id": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16701352411601217 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29383772587210827 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.354125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10871010638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1/70577ab1-a0ef-41f3-8d6a-00b0b873ee39.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1/70577ab1-a0ef-41f3-8d6a-00b0b873ee39.json new file mode 100644 index 000000000..08dadfd6c --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1/70577ab1-a0ef-41f3-8d6a-00b0b873ee39.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V1/1762652579.962892", + "retrieved_timestamp": "1762652579.962893", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1", + "developer": "Youlln", + "inference_platform": "unknown", + "id": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32510848991786234 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4208506248736219 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10725075528700906 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4265833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2935505319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2/6021f954-951a-47e1-980d-ce729f9f39b4.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2/6021f954-951a-47e1-980d-ce729f9f39b4.json new file mode 100644 index 000000000..0ead19a73 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2/6021f954-951a-47e1-980d-ce729f9f39b4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V2/1762652579.963118", + "retrieved_timestamp": "1762652579.963118", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2", + "developer": "Youlln", + "inference_platform": "unknown", + "id": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32510848991786234 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4208506248736219 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10725075528700906 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4265833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2935505319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4/e027a39b-1213-42aa-b66f-b1853c644532.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4/e027a39b-1213-42aa-b66f-b1853c644532.json new file mode 100644 index 000000000..b2eb1604e --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4/e027a39b-1213-42aa-b66f-b1853c644532.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-YL-7B-SLERP-V4/1762652579.963329", + "retrieved_timestamp": "1762652579.963329", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4", + "developer": "Youlln", + "inference_platform": "unknown", + "id": "Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2509696494190969 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37697272812325017 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3744895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2131815159574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5-FT/4264c0fc-9f40-4c27-b877-63a751678a1c.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5-FT/4264c0fc-9f40-4c27-b877-63a751678a1c.json new file mode 100644 index 000000000..4c460cf0b --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5-FT/4264c0fc-9f40-4c27-b877-63a751678a1c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL0.5-FT/1762652579.963541", + "retrieved_timestamp": "1762652579.963541", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Youlln/ECE-PRYMMAL0.5-FT", + "developer": "Youlln", + "inference_platform": "unknown", + "id": "Youlln/ECE-PRYMMAL0.5-FT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18507338306803725 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31320911187036277 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.023413897280966767 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.330125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14768949468085107 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5B-Youri/46564b0a-1489-4c98-9e7b-20daf58c2f87.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5B-Youri/46564b0a-1489-4c98-9e7b-20daf58c2f87.json new file mode 100644 index 000000000..f12f1ead1 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL0.5B-Youri/46564b0a-1489-4c98-9e7b-20daf58c2f87.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL0.5B-Youri/1762652579.963748", + "retrieved_timestamp": "1762652579.9637492", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Youlln/ECE-PRYMMAL0.5B-Youri", + "developer": "Youlln", + "inference_platform": "unknown", + "id": "Youlln/ECE-PRYMMAL0.5B-Youri" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1446317991817267 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28173574256265815 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24328859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36965625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10954122340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-PRYMMAL1B-FT-V1/c3a0b587-b379-4013-a5ce-26fdc9dcc44d.json b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL1B-FT-V1/c3a0b587-b379-4013-a5ce-26fdc9dcc44d.json new file mode 100644 index 000000000..60d65c97d --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-PRYMMAL1B-FT-V1/c3a0b587-b379-4013-a5ce-26fdc9dcc44d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL1B-FT-V1/1762652579.963949", + "retrieved_timestamp": "1762652579.9639502", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Youlln/ECE-PRYMMAL1B-FT-V1", + "developer": "Youlln", + "inference_platform": "unknown", + "id": "Youlln/ECE-PRYMMAL1B-FT-V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2143745262569981 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4032647427840684 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06419939577039276 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34165625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2742686170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE-Qwen0.5B-FT-V2/ee8952db-9f0a-4892-bff9-4d2ca1b66364.json b/data/hfopenllm_v2/Youlln/ECE-Qwen0.5B-FT-V2/ee8952db-9f0a-4892-bff9-4d2ca1b66364.json new file mode 100644 index 000000000..a650a0c2a --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE-Qwen0.5B-FT-V2/ee8952db-9f0a-4892-bff9-4d2ca1b66364.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Youlln_ECE-Qwen0.5B-FT-V2/1762652579.9641678", + "retrieved_timestamp": "1762652579.964169", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Youlln/ECE-Qwen0.5B-FT-V2", + "developer": "Youlln", + "inference_platform": "unknown", + "id": "Youlln/ECE-Qwen0.5B-FT-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25259311958935626 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.328970813623839 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30628125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16655585106382978 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Youlln/ECE.EIFFEIL.ia-0.5B-SLERP/7a5fdffa-146b-43fd-a979-728c37ae599f.json b/data/hfopenllm_v2/Youlln/ECE.EIFFEIL.ia-0.5B-SLERP/7a5fdffa-146b-43fd-a979-728c37ae599f.json new file mode 100644 index 000000000..8f5733421 --- /dev/null +++ b/data/hfopenllm_v2/Youlln/ECE.EIFFEIL.ia-0.5B-SLERP/7a5fdffa-146b-43fd-a979-728c37ae599f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Youlln_ECE.EIFFEIL.ia-0.5B-SLERP/1762652579.964375", + "retrieved_timestamp": "1762652579.964375", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Youlln/ECE.EIFFEIL.ia-0.5B-SLERP", + "developer": "Youlln", + "inference_platform": "unknown", + "id": "Youlln/ECE.EIFFEIL.ia-0.5B-SLERP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2561403742071038 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33056720460862643 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05966767371601209 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31021875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1903257978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Yuma42/KangalKhan-RawRuby-7B/4ad4a260-770a-4cce-9ba7-546cfa4cde58.json b/data/hfopenllm_v2/Yuma42/KangalKhan-RawRuby-7B/4ad4a260-770a-4cce-9ba7-546cfa4cde58.json new file mode 100644 index 000000000..19a58f89e --- /dev/null +++ b/data/hfopenllm_v2/Yuma42/KangalKhan-RawRuby-7B/4ad4a260-770a-4cce-9ba7-546cfa4cde58.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Yuma42_KangalKhan-RawRuby-7B/1762652579.9648829", + "retrieved_timestamp": "1762652579.964884", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yuma42/KangalKhan-RawRuby-7B", + "developer": "Yuma42", + "inference_platform": "unknown", + "id": "Yuma42/KangalKhan-RawRuby-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.547674614467391 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47547278683676025 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39495833333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30227726063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/Z1-Coder/Z1-Coder-7B/750b35ad-fdf6-4243-91e7-aee90f84fa5b.json b/data/hfopenllm_v2/Z1-Coder/Z1-Coder-7B/750b35ad-fdf6-4243-91e7-aee90f84fa5b.json new file mode 100644 index 000000000..fd0ddf8c0 --- /dev/null +++ b/data/hfopenllm_v2/Z1-Coder/Z1-Coder-7B/750b35ad-fdf6-4243-91e7-aee90f84fa5b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Z1-Coder_Z1-Coder-7B/1762652579.9655669", + "retrieved_timestamp": "1762652579.965568", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Z1-Coder/Z1-Coder-7B", + "developer": "Z1-Coder", + "inference_platform": "unknown", + "id": "Z1-Coder/Z1-Coder-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3215113676157041 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48418251218099567 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.324773413897281 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36215625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37591422872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ZHLiu627/zephyr-7b-gemma-dpo-avg/856a1f50-7ffb-4eb1-be4a-8aaa3cd6ee66.json b/data/hfopenllm_v2/ZHLiu627/zephyr-7b-gemma-dpo-avg/856a1f50-7ffb-4eb1-be4a-8aaa3cd6ee66.json new file mode 100644 index 000000000..145bb23cd --- /dev/null +++ b/data/hfopenllm_v2/ZHLiu627/zephyr-7b-gemma-dpo-avg/856a1f50-7ffb-4eb1-be4a-8aaa3cd6ee66.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ZHLiu627_zephyr-7b-gemma-dpo-avg/1762652579.9658082", + "retrieved_timestamp": "1762652579.9658089", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZHLiu627/zephyr-7b-gemma-dpo-avg", + "developer": "ZHLiu627", + "inference_platform": "unknown", + "id": "ZHLiu627/zephyr-7b-gemma-dpo-avg" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30899679517014855 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41488227982365095 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4107083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28507313829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ZeroXClem/L3-Aspire-Heart-Matrix-8B/e6d8d952-5a3d-4a97-860c-8275b10c6516.json b/data/hfopenllm_v2/ZeroXClem/L3-Aspire-Heart-Matrix-8B/e6d8d952-5a3d-4a97-860c-8275b10c6516.json new file mode 100644 index 000000000..97f2127f5 --- /dev/null +++ b/data/hfopenllm_v2/ZeroXClem/L3-Aspire-Heart-Matrix-8B/e6d8d952-5a3d-4a97-860c-8275b10c6516.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ZeroXClem_L3-Aspire-Heart-Matrix-8B/1762652579.96632", + "retrieved_timestamp": "1762652579.966321", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZeroXClem/L3-Aspire-Heart-Matrix-8B", + "developer": "ZeroXClem", + "inference_platform": "unknown", + "id": "ZeroXClem/L3-Aspire-Heart-Matrix-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48335305877294465 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5384211938486898 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18277945619335348 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4187083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3784906914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ZeusLabs/L3-Aethora-15B-V2/0e9ed58c-1a3e-49b4-8013-994642a95920.json b/data/hfopenllm_v2/ZeusLabs/L3-Aethora-15B-V2/0e9ed58c-1a3e-49b4-8013-994642a95920.json new file mode 100644 index 000000000..e57d28d67 --- /dev/null +++ b/data/hfopenllm_v2/ZeusLabs/L3-Aethora-15B-V2/0e9ed58c-1a3e-49b4-8013-994642a95920.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ZeusLabs_L3-Aethora-15B-V2/1762652579.968798", + "retrieved_timestamp": "1762652579.9687989", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZeusLabs/L3-Aethora-15B-V2", + "developer": "ZeusLabs", + "inference_platform": "unknown", + "id": "ZeusLabs/L3-Aethora-15B-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7208063493752133 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5010910465463698 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08081570996978851 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3870833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3499833776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 15.01 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3/6bf4063b-44aa-4809-a400-5406abe5eb2e.json b/data/hfopenllm_v2/ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3/6bf4063b-44aa-4809-a400-5406abe5eb2e.json new file mode 100644 index 000000000..16acba86f --- /dev/null +++ b/data/hfopenllm_v2/ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3/6bf4063b-44aa-4809-a400-5406abe5eb2e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ZhangShenao_SELM-Llama-3-8B-Instruct-iter-3/1762652579.9690418", + "retrieved_timestamp": "1762652579.969043", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3", + "developer": "ZhangShenao", + "inference_platform": "unknown", + "id": "ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6902817856620433 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5046089390770511 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08610271903323263 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38451041666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3783244680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abacusai/Dracarys-72B-Instruct/2f1e6f4e-86e6-47a4-96e6-3bc2b330cd3a.json b/data/hfopenllm_v2/abacusai/Dracarys-72B-Instruct/2f1e6f4e-86e6-47a4-96e6-3bc2b330cd3a.json new file mode 100644 index 000000000..eb9544f71 --- /dev/null +++ b/data/hfopenllm_v2/abacusai/Dracarys-72B-Instruct/2f1e6f4e-86e6-47a4-96e6-3bc2b330cd3a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/abacusai_Dracarys-72B-Instruct/1762652579.969532", + "retrieved_timestamp": "1762652579.969532", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "abacusai/Dracarys-72B-Instruct", + "developer": "abacusai", + "inference_platform": "unknown", + "id": "abacusai/Dracarys-72B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7855778224001206 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6944066392084981 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39652567975830816 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39093959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4558229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5456283244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abacusai/Smaug-34B-v0.1/e0b9044d-1b87-44f7-b59b-88d790f429e5.json b/data/hfopenllm_v2/abacusai/Smaug-34B-v0.1/e0b9044d-1b87-44f7-b59b-88d790f429e5.json new file mode 100644 index 000000000..d6c386a52 --- /dev/null +++ b/data/hfopenllm_v2/abacusai/Smaug-34B-v0.1/e0b9044d-1b87-44f7-b59b-88d790f429e5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/abacusai_Smaug-34B-v0.1/1762652579.970392", + "retrieved_timestamp": "1762652579.9703932", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "abacusai/Smaug-34B-v0.1", + "developer": "abacusai", + "inference_platform": "unknown", + "id": "abacusai/Smaug-34B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5015625207782018 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5357785983493821 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07175226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3296979865771812 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.397875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4542885638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abacusai/Smaug-72B-v0.1/a3b08cd3-6ead-4db0-92ed-212c6b0e45ee.json b/data/hfopenllm_v2/abacusai/Smaug-72B-v0.1/a3b08cd3-6ead-4db0-92ed-212c6b0e45ee.json new file mode 100644 index 000000000..e64036cc2 --- /dev/null +++ b/data/hfopenllm_v2/abacusai/Smaug-72B-v0.1/a3b08cd3-6ead-4db0-92ed-212c6b0e45ee.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/abacusai_Smaug-72B-v0.1/1762652579.970887", + "retrieved_timestamp": "1762652579.9708889", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "abacusai/Smaug-72B-v0.1", + "developer": "abacusai", + "inference_platform": "unknown", + "id": "abacusai/Smaug-72B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5167001334237601 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5995632330786429 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19108761329305135 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4473229166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4623503989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 72.289 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abacusai/Smaug-Llama-3-70B-Instruct-32K/962b4977-63f0-4a87-a36e-f3e592b74761.json b/data/hfopenllm_v2/abacusai/Smaug-Llama-3-70B-Instruct-32K/962b4977-63f0-4a87-a36e-f3e592b74761.json new file mode 100644 index 000000000..b0f2994fb --- /dev/null +++ b/data/hfopenllm_v2/abacusai/Smaug-Llama-3-70B-Instruct-32K/962b4977-63f0-4a87-a36e-f3e592b74761.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/abacusai_Smaug-Llama-3-70B-Instruct-32K/1762652579.971162", + "retrieved_timestamp": "1762652579.9711628", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "abacusai/Smaug-Llama-3-70B-Instruct-32K", + "developer": "abacusai", + "inference_platform": "unknown", + "id": "abacusai/Smaug-Llama-3-70B-Instruct-32K" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7761107195574409 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6493108088828602 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27492447129909364 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4207916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47647938829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abacusai/Smaug-Mixtral-v0.1/ba0fe822-7a57-4ccb-a97e-e852a59d9ae1.json b/data/hfopenllm_v2/abacusai/Smaug-Mixtral-v0.1/ba0fe822-7a57-4ccb-a97e-e852a59d9ae1.json new file mode 100644 index 000000000..9ab3a95b6 --- /dev/null +++ b/data/hfopenllm_v2/abacusai/Smaug-Mixtral-v0.1/ba0fe822-7a57-4ccb-a97e-e852a59d9ae1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/abacusai_Smaug-Mixtral-v0.1/1762652579.971408", + "retrieved_timestamp": "1762652579.9714088", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "abacusai/Smaug-Mixtral-v0.1", + "developer": "abacusai", + "inference_platform": "unknown", + "id": "abacusai/Smaug-Mixtral-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5554428915278129 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5162245602454115 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09516616314199396 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4298125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3351894946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abacusai/Smaug-Qwen2-72B-Instruct/84695a6b-dc11-448c-bbeb-b3cc05cde7ba.json b/data/hfopenllm_v2/abacusai/Smaug-Qwen2-72B-Instruct/84695a6b-dc11-448c-bbeb-b3cc05cde7ba.json new file mode 100644 index 000000000..d158e05e8 --- /dev/null +++ b/data/hfopenllm_v2/abacusai/Smaug-Qwen2-72B-Instruct/84695a6b-dc11-448c-bbeb-b3cc05cde7ba.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/abacusai_Smaug-Qwen2-72B-Instruct/1762652579.9716392", + "retrieved_timestamp": "1762652579.97164", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "abacusai/Smaug-Qwen2-72B-Instruct", + "developer": "abacusai", + "inference_platform": "unknown", + "id": "abacusai/Smaug-Qwen2-72B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7825303527972447 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6909789934583822 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4131419939577039 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3615771812080537 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44007291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.519032579787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abacusai/bigstral-12b-32k/aed1ac03-5364-477e-ab8f-68b599170128.json b/data/hfopenllm_v2/abacusai/bigstral-12b-32k/aed1ac03-5364-477e-ab8f-68b599170128.json new file mode 100644 index 000000000..c719c1240 --- /dev/null +++ b/data/hfopenllm_v2/abacusai/bigstral-12b-32k/aed1ac03-5364-477e-ab8f-68b599170128.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/abacusai_bigstral-12b-32k/1762652579.971883", + "retrieved_timestamp": "1762652579.971884", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "abacusai/bigstral-12b-32k", + "developer": "abacusai", + "inference_platform": "unknown", + "id": "abacusai/bigstral-12b-32k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41938057686937324 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4700122314782882 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45597916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26412898936170215 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.476 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abacusai/bigyi-15b/19b4d65c-39c7-4b81-bb71-f166ab4f9490.json b/data/hfopenllm_v2/abacusai/bigyi-15b/19b4d65c-39c7-4b81-bb71-f166ab4f9490.json new file mode 100644 index 000000000..9469eaf24 --- /dev/null +++ b/data/hfopenllm_v2/abacusai/bigyi-15b/19b4d65c-39c7-4b81-bb71-f166ab4f9490.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/abacusai_bigyi-15b/1762652579.972117", + "retrieved_timestamp": "1762652579.972117", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "abacusai/bigyi-15b", + "developer": "abacusai", + "inference_platform": "unknown", + "id": "abacusai/bigyi-15b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20940327220663396 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4345298820215116 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35378125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30028257978723405 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 15.058 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/b5707c22-a2a2-4787-a902-b72945ebccd9.json b/data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/b5707c22-a2a2-4787-a902-b72945ebccd9.json new file mode 100644 index 000000000..1e269ffdb --- /dev/null +++ b/data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/b5707c22-a2a2-4787-a902-b72945ebccd9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/abhishek_autotrain-0tmgq-5tpbg/1762652579.972783", + "retrieved_timestamp": "1762652579.972784", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "abhishek/autotrain-0tmgq-5tpbg", + "developer": "abhishek", + "inference_platform": "unknown", + "id": "abhishek/autotrain-0tmgq-5tpbg" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19516549422199764 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3127326480314375 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35837499999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11436170212765957 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/ddd32642-ed7a-41b8-974a-f85b7f04d0db.json b/data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/ddd32642-ed7a-41b8-974a-f85b7f04d0db.json new file mode 100644 index 000000000..5b9d4bd84 --- /dev/null +++ b/data/hfopenllm_v2/abhishek/autotrain-0tmgq-5tpbg/ddd32642-ed7a-41b8-974a-f85b7f04d0db.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/abhishek_autotrain-0tmgq-5tpbg/1762652579.972393", + "retrieved_timestamp": "1762652579.972395", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "abhishek/autotrain-0tmgq-5tpbg", + "developer": "abhishek", + "inference_platform": "unknown", + "id": "abhishek/autotrain-0tmgq-5tpbg" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19571514692127998 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3134513987945074 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36504166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11510970744680851 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/abhishek/autotrain-vr4a1-e5mms/e1462a5a-d120-4c0f-ba13-fbecb18619a0.json b/data/hfopenllm_v2/abhishek/autotrain-vr4a1-e5mms/e1462a5a-d120-4c0f-ba13-fbecb18619a0.json new file mode 100644 index 000000000..99960a026 --- /dev/null +++ b/data/hfopenllm_v2/abhishek/autotrain-vr4a1-e5mms/e1462a5a-d120-4c0f-ba13-fbecb18619a0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/abhishek_autotrain-vr4a1-e5mms/1762652579.973708", + "retrieved_timestamp": "1762652579.973709", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "abhishek/autotrain-vr4a1-e5mms", + "developer": "abhishek", + "inference_platform": "unknown", + "id": "abhishek/autotrain-vr4a1-e5mms" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21422492320376602 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5000624442873264 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14123867069486404 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.389125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36668882978723405 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 16.061 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/adamo1139/Yi-34B-200K-AEZAKMI-v2/a28de361-e90d-44f7-b609-e4d64ae1be6f.json b/data/hfopenllm_v2/adamo1139/Yi-34B-200K-AEZAKMI-v2/a28de361-e90d-44f7-b609-e4d64ae1be6f.json new file mode 100644 index 000000000..5a30331a4 --- /dev/null +++ b/data/hfopenllm_v2/adamo1139/Yi-34B-200K-AEZAKMI-v2/a28de361-e90d-44f7-b609-e4d64ae1be6f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/adamo1139_Yi-34B-200K-AEZAKMI-v2/1762652579.974368", + "retrieved_timestamp": "1762652579.974369", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "adamo1139/Yi-34B-200K-AEZAKMI-v2", + "developer": "adamo1139", + "inference_platform": "unknown", + "id": "adamo1139/Yi-34B-200K-AEZAKMI-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4555257827010111 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5383819237015192 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33221476510067116 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38860416666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4512965425531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/aevalone/distill_qw_test/108ead60-3cee-43e7-925a-619bace5b65f.json b/data/hfopenllm_v2/aevalone/distill_qw_test/108ead60-3cee-43e7-925a-619bace5b65f.json new file mode 100644 index 000000000..5c8f47cfe --- /dev/null +++ b/data/hfopenllm_v2/aevalone/distill_qw_test/108ead60-3cee-43e7-925a-619bace5b65f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/aevalone_distill_qw_test/1762652579.975426", + "retrieved_timestamp": "1762652579.9754272", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "aevalone/distill_qw_test", + "developer": "aevalone", + "inference_platform": "unknown", + "id": "aevalone/distill_qw_test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.740889728143548 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5245748734435777 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4780966767371601 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38596874999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4091589095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/agentlans/Llama-3.2-1B-Instruct-CrashCourse12K/fbedd898-b839-49c1-bd6d-3a8744d4138a.json b/data/hfopenllm_v2/agentlans/Llama-3.2-1B-Instruct-CrashCourse12K/fbedd898-b839-49c1-bd6d-3a8744d4138a.json new file mode 100644 index 000000000..7a1a41bed --- /dev/null +++ b/data/hfopenllm_v2/agentlans/Llama-3.2-1B-Instruct-CrashCourse12K/fbedd898-b839-49c1-bd6d-3a8744d4138a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/agentlans_Llama-3.2-1B-Instruct-CrashCourse12K/1762652579.976028", + "retrieved_timestamp": "1762652579.976029", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "agentlans/Llama-3.2-1B-Instruct-CrashCourse12K", + "developer": "agentlans", + "inference_platform": "unknown", + "id": "agentlans/Llama-3.2-1B-Instruct-CrashCourse12K" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5395062877609188 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35481032861183426 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07099697885196375 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2407718120805369 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32104166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1809341755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/agentlans/Llama3.1-Daredevilish-Instruct/7a6d7a66-5772-4793-9597-ef0225b63f30.json b/data/hfopenllm_v2/agentlans/Llama3.1-Daredevilish-Instruct/7a6d7a66-5772-4793-9597-ef0225b63f30.json new file mode 100644 index 000000000..0026724ef --- /dev/null +++ b/data/hfopenllm_v2/agentlans/Llama3.1-Daredevilish-Instruct/7a6d7a66-5772-4793-9597-ef0225b63f30.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-Daredevilish-Instruct/1762652579.9768262", + "retrieved_timestamp": "1762652579.976827", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "agentlans/Llama3.1-Daredevilish-Instruct", + "developer": "agentlans", + "inference_platform": "unknown", + "id": "agentlans/Llama3.1-Daredevilish-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7925969760236173 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5235442557198345 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17220543806646527 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3910833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3877160904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout/ad130d6f-6a5e-447a-a1ee-bfa2d93e5336.json b/data/hfopenllm_v2/agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout/ad130d6f-6a5e-447a-a1ee-bfa2d93e5336.json new file mode 100644 index 000000000..f39406301 --- /dev/null +++ b/data/hfopenllm_v2/agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout/ad130d6f-6a5e-447a-a1ee-bfa2d93e5336.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/agentlans_Qwen2.5-0.5B-Instruct-CrashCourse-dropout/1762652579.9778361", + "retrieved_timestamp": "1762652579.977837", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout", + "developer": "agentlans", + "inference_platform": "unknown", + "id": "agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2948831323111566 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3311726760218689 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04229607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3341875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16082114361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ai21labs/Jamba-v0.1/e9546f28-0f6b-449e-a2b3-c6ab262103cc.json b/data/hfopenllm_v2/ai21labs/Jamba-v0.1/e9546f28-0f6b-449e-a2b3-c6ab262103cc.json new file mode 100644 index 000000000..489d7fd08 --- /dev/null +++ b/data/hfopenllm_v2/ai21labs/Jamba-v0.1/e9546f28-0f6b-449e-a2b3-c6ab262103cc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ai21labs_Jamba-v0.1/1762652579.978585", + "retrieved_timestamp": "1762652579.978585", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ai21labs/Jamba-v0.1", + "developer": "ai21labs", + "inference_platform": "unknown", + "id": "ai21labs/Jamba-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20255920956395698 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36022602451645724 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35902083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24916888297872342 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "JambaForCausalLM", + "params_billions": 51.57 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ai4bharat/Airavata/350b0559-6331-4b8b-82e2-0463baea9d8a.json b/data/hfopenllm_v2/ai4bharat/Airavata/350b0559-6331-4b8b-82e2-0463baea9d8a.json new file mode 100644 index 000000000..35ff3e5b0 --- /dev/null +++ b/data/hfopenllm_v2/ai4bharat/Airavata/350b0559-6331-4b8b-82e2-0463baea9d8a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ai4bharat_Airavata/1762652579.978861", + "retrieved_timestamp": "1762652579.978862", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ai4bharat/Airavata", + "developer": "ai4bharat", + "inference_platform": "unknown", + "id": "ai4bharat/Airavata" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05585402288150995 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36276862514633795 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01812688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3762916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1634807180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.87 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/aixonlab/Aether-12b/831b6f81-1552-4a7b-acac-eb927001e440.json b/data/hfopenllm_v2/aixonlab/Aether-12b/831b6f81-1552-4a7b-acac-eb927001e440.json new file mode 100644 index 000000000..ff491bec3 --- /dev/null +++ b/data/hfopenllm_v2/aixonlab/Aether-12b/831b6f81-1552-4a7b-acac-eb927001e440.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/aixonlab_Aether-12b/1762652579.979132", + "retrieved_timestamp": "1762652579.979133", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "aixonlab/Aether-12b", + "developer": "aixonlab", + "inference_platform": "unknown", + "id": "aixonlab/Aether-12b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23468286369056326 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5179400750435481 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38286458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3410073138297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/aixonlab/Grey-12b/2c4626c7-3016-4641-9862-0ba4f7f7936c.json b/data/hfopenllm_v2/aixonlab/Grey-12b/2c4626c7-3016-4641-9862-0ba4f7f7936c.json new file mode 100644 index 000000000..9ea3b7a6c --- /dev/null +++ b/data/hfopenllm_v2/aixonlab/Grey-12b/2c4626c7-3016-4641-9862-0ba4f7f7936c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/aixonlab_Grey-12b/1762652579.979384", + "retrieved_timestamp": "1762652579.9793851", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "aixonlab/Grey-12b", + "developer": "aixonlab", + "inference_platform": "unknown", + "id": "aixonlab/Grey-12b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39679938119744496 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5698957505959833 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09818731117824774 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4516354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3779089095744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/aixonlab/Zara-14b-v1.2/a4c3ddcb-482c-47fb-9290-3c0678b38fb4.json b/data/hfopenllm_v2/aixonlab/Zara-14b-v1.2/a4c3ddcb-482c-47fb-9290-3c0678b38fb4.json new file mode 100644 index 000000000..bff3ca2b0 --- /dev/null +++ b/data/hfopenllm_v2/aixonlab/Zara-14b-v1.2/a4c3ddcb-482c-47fb-9290-3c0678b38fb4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/aixonlab_Zara-14b-v1.2/1762652579.979647", + "retrieved_timestamp": "1762652579.979647", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "aixonlab/Zara-14b-v1.2", + "developer": "aixonlab", + "inference_platform": "unknown", + "id": "aixonlab/Zara-14b-v1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6197400674654362 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6405368457456163 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35347432024169184 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38171140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46747916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5263464095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/1-800-LLMs/Qwen-2.5-14B-Hindi/21ba6052-9614-454e-999d-ef4f0f693c6c.json b/data/hfopenllm_v2/alibaba/1-800-LLMs/Qwen-2.5-14B-Hindi/21ba6052-9614-454e-999d-ef4f0f693c6c.json new file mode 100644 index 000000000..c2dc37ec6 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/1-800-LLMs/Qwen-2.5-14B-Hindi/21ba6052-9614-454e-999d-ef4f0f693c6c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/1-800-LLMs_Qwen-2.5-14B-Hindi/1762652579.467683", + "retrieved_timestamp": "1762652579.4676852", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "1-800-LLMs/Qwen-2.5-14B-Hindi", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "1-800-LLMs/Qwen-2.5-14B-Hindi" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.582570911847232 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6523901531956199 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3330815709969788 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3624161073825503 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4489375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5262632978723404 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/1024m/QWEN-14B-B100/745bd077-3a0f-4c06-8d19-d7c160512446.json b/data/hfopenllm_v2/alibaba/1024m/QWEN-14B-B100/745bd077-3a0f-4c06-8d19-d7c160512446.json new file mode 100644 index 000000000..4b46bbafc --- /dev/null +++ b/data/hfopenllm_v2/alibaba/1024m/QWEN-14B-B100/745bd077-3a0f-4c06-8d19-d7c160512446.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/1024m_QWEN-14B-B100/1762652579.468843", + "retrieved_timestamp": "1762652579.4688451", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "1024m/QWEN-14B-B100", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "1024m/QWEN-14B-B100" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7762104549262623 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.653271132679638 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5438066465256798 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35067114093959734 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5178690159574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Aashraf995/Qwen-Evo-7B/705ae322-fed9-4a98-a79e-e0b289065ba9.json b/data/hfopenllm_v2/alibaba/Aashraf995/Qwen-Evo-7B/705ae322-fed9-4a98-a79e-e0b289065ba9.json new file mode 100644 index 000000000..51d57e54d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Aashraf995/Qwen-Evo-7B/705ae322-fed9-4a98-a79e-e0b289065ba9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Aashraf995_Qwen-Evo-7B/1762652579.4765608", + "retrieved_timestamp": "1762652579.476562", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aashraf995/Qwen-Evo-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Aashraf995/Qwen-Evo-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4757343847657549 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5709361538590277 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31419939577039274 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32550335570469796 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4541458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44622672872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Aashraf995/QwenStock-14B/7888b813-8ef1-4367-8168-edd1bd3c7888.json b/data/hfopenllm_v2/alibaba/Aashraf995/QwenStock-14B/7888b813-8ef1-4367-8168-edd1bd3c7888.json new file mode 100644 index 000000000..953f8a0fc --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Aashraf995/QwenStock-14B/7888b813-8ef1-4367-8168-edd1bd3c7888.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Aashraf995_QwenStock-14B/1762652579.476816", + "retrieved_timestamp": "1762652579.476817", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aashraf995/QwenStock-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Aashraf995/QwenStock-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5008632650256873 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6550130348108012 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35725075528700906 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38926174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4792604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5382313829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Alsebay/Qwen2.5-7B-test-novelist/19ff3120-2171-48b3-8db6-1c76bb57cf47.json b/data/hfopenllm_v2/alibaba/Alsebay/Qwen2.5-7B-test-novelist/19ff3120-2171-48b3-8db6-1c76bb57cf47.json new file mode 100644 index 000000000..863c018c7 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Alsebay/Qwen2.5-7B-test-novelist/19ff3120-2171-48b3-8db6-1c76bb57cf47.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Alsebay_Qwen2.5-7B-test-novelist/1762652579.479883", + "retrieved_timestamp": "1762652579.4798841", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Alsebay/Qwen2.5-7B-test-novelist", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Alsebay/Qwen2.5-7B-test-novelist" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5351600420218354 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.515121518446605 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2348942598187311 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47488541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3865525265957447 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Aryanne/QwentileSwap/ee2c5dd9-09db-45fa-8e67-961993d30672.json b/data/hfopenllm_v2/alibaba/Aryanne/QwentileSwap/ee2c5dd9-09db-45fa-8e67-961993d30672.json new file mode 100644 index 000000000..30f9c1793 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Aryanne/QwentileSwap/ee2c5dd9-09db-45fa-8e67-961993d30672.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Aryanne_QwentileSwap/1762652579.4827101", + "retrieved_timestamp": "1762652579.482711", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aryanne/QwentileSwap", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Aryanne/QwentileSwap" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7378422585406721 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7008370136278447 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42220543806646527 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3674496644295302 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4640416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5945811170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/AtAndDev/Qwen2.5-1.5B-continuous-learnt/1a2d8396-4ff1-4386-a76b-d4863c7736c5.json b/data/hfopenllm_v2/alibaba/AtAndDev/Qwen2.5-1.5B-continuous-learnt/1a2d8396-4ff1-4386-a76b-d4863c7736c5.json new file mode 100644 index 000000000..ea911d3f0 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/AtAndDev/Qwen2.5-1.5B-continuous-learnt/1a2d8396-4ff1-4386-a76b-d4863c7736c5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AtAndDev_Qwen2.5-1.5B-continuous-learnt/1762652579.483878", + "retrieved_timestamp": "1762652579.4838789", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AtAndDev/Qwen2.5-1.5B-continuous-learnt", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "AtAndDev/Qwen2.5-1.5B-continuous-learnt" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45105431366551857 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42746984992662185 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1472809667673716 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36228124999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28058510638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/AtAndDev/Qwen2.5-1.5B-continuous-learnt/4f7f368f-0646-4c16-80de-69d9c5e28193.json b/data/hfopenllm_v2/alibaba/AtAndDev/Qwen2.5-1.5B-continuous-learnt/4f7f368f-0646-4c16-80de-69d9c5e28193.json new file mode 100644 index 000000000..1203840e5 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/AtAndDev/Qwen2.5-1.5B-continuous-learnt/4f7f368f-0646-4c16-80de-69d9c5e28193.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AtAndDev_Qwen2.5-1.5B-continuous-learnt/1762652579.483521", + "retrieved_timestamp": "1762652579.483522", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AtAndDev/Qwen2.5-1.5B-continuous-learnt", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "AtAndDev/Qwen2.5-1.5B-continuous-learnt" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4605214165081982 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42577470857933336 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07477341389728097 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3636458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28116688829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/dcd14b21-f2fd-4c10-bf83-b6bb946f2789.json b/data/hfopenllm_v2/alibaba/CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/dcd14b21-f2fd-4c10-bf83-b6bb946f2789.json new file mode 100644 index 000000000..f5b275fdb --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/dcd14b21-f2fd-4c10-bf83-b6bb946f2789.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CombinHorizon_Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/1762652579.508495", + "retrieved_timestamp": "1762652579.5084958", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8239958864701216 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6370093752306357 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5317220543806647 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42603125000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4979222074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/3171e54f-4c6f-40cf-ba6c-ef23b803ca33.json b/data/hfopenllm_v2/alibaba/CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/3171e54f-4c6f-40cf-ba6c-ef23b803ca33.json new file mode 100644 index 000000000..aefa35b7c --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/3171e54f-4c6f-40cf-ba6c-ef23b803ca33.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CombinHorizon_Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/1762652579.508758", + "retrieved_timestamp": "1762652579.508759", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7564019025075688 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5402085849577634 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.493202416918429 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40330208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4341755319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/62faed28-8f0f-4ff8-894f-b4b5b754b4cf.json b/data/hfopenllm_v2/alibaba/CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/62faed28-8f0f-4ff8-894f-b4b5b754b4cf.json new file mode 100644 index 000000000..b282f2f20 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/62faed28-8f0f-4ff8-894f-b4b5b754b4cf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CombinHorizon_huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/1762652579.509247", + "retrieved_timestamp": "1762652579.509248", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8206237228331937 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.692924708291253 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5944108761329305 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3389261744966443 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42072916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5720578457446809 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/62b4c918-b33b-40cf-888b-42b116a9e04d.json b/data/hfopenllm_v2/alibaba/CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/62b4c918-b33b-40cf-888b-42b116a9e04d.json new file mode 100644 index 000000000..d9b653064 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/62b4c918-b33b-40cf-888b-42b116a9e04d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CombinHorizon_huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/1762652579.509461", + "retrieved_timestamp": "1762652579.509462", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8175762532303177 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6335891556421077 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.547583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42603125000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4910239361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/3bf71784-e6f1-405b-ad23-e74a91df7051.json b/data/hfopenllm_v2/alibaba/CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/3bf71784-e6f1-405b-ad23-e74a91df7051.json new file mode 100644 index 000000000..ef98e5850 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/3bf71784-e6f1-405b-ad23-e74a91df7051.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CombinHorizon_zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/1762652579.509675", + "retrieved_timestamp": "1762652579.509676", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8328136012446974 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6955174427138592 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5853474320241692 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3674496644295302 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43139583333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5684840425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme-merge2/2121d736-eec6-4a86-bae0-cd032f9eb603.json b/data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme-merge2/2121d736-eec6-4a86-bae0-cd032f9eb603.json new file mode 100644 index 000000000..a5fa40c3c --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme-merge2/2121d736-eec6-4a86-bae0-cd032f9eb603.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CoolSpring_Qwen2-0.5B-Abyme-merge2/1762652579.511093", + "retrieved_timestamp": "1762652579.511094", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CoolSpring/Qwen2-0.5B-Abyme-merge2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CoolSpring/Qwen2-0.5B-Abyme-merge2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2021846478454944 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29942723009138733 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03323262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3687291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14893617021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme-merge3/2a633e8b-b35a-4a26-83bb-b471bab18ed2.json b/data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme-merge3/2a633e8b-b35a-4a26-83bb-b471bab18ed2.json new file mode 100644 index 000000000..d1dfaaaee --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme-merge3/2a633e8b-b35a-4a26-83bb-b471bab18ed2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CoolSpring_Qwen2-0.5B-Abyme-merge3/1762652579.51142", + "retrieved_timestamp": "1762652579.511421", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CoolSpring/Qwen2-0.5B-Abyme-merge3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CoolSpring/Qwen2-0.5B-Abyme-merge3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23860468002677343 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30031404525933675 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03172205438066465 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35009375000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15001662234042554 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme/46d2afd2-b620-4474-ac6c-4f6bdef93d1c.json b/data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme/46d2afd2-b620-4474-ac6c-4f6bdef93d1c.json new file mode 100644 index 000000000..52bff1e77 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CoolSpring/Qwen2-0.5B-Abyme/46d2afd2-b620-4474-ac6c-4f6bdef93d1c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CoolSpring_Qwen2-0.5B-Abyme/1762652579.5106628", + "retrieved_timestamp": "1762652579.510665", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CoolSpring/Qwen2-0.5B-Abyme", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CoolSpring/Qwen2-0.5B-Abyme" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19151850423542865 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2861834296481826 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35421875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13331117021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Broca/4429613e-2db7-4061-931f-eaa70d202b71.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Broca/4429613e-2db7-4061-931f-eaa70d202b71.json new file mode 100644 index 000000000..11ab78895 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Broca/4429613e-2db7-4061-931f-eaa70d202b71.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Broca/1762652579.5150259", + "retrieved_timestamp": "1762652579.5150259", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-Broca", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-Broca" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.560414145578177 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6527145981540362 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3580060422960725 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38674496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47665625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5364029255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-BrocaV9/782219f0-25f7-465b-9f86-5e48c9d4703e.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-BrocaV9/782219f0-25f7-465b-9f86-5e48c9d4703e.json new file mode 100644 index 000000000..82a610c8f --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-BrocaV9/782219f0-25f7-465b-9f86-5e48c9d4703e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-BrocaV9/1762652579.515307", + "retrieved_timestamp": "1762652579.5153081", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-BrocaV9", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-BrocaV9" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6762933460994606 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6391383585238984 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3814199395770393 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3640939597315436 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46903125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5330784574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav3/7abe4912-4e21-4774-8011-482603f7bcc0.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav3/7abe4912-4e21-4774-8011-482603f7bcc0.json new file mode 100644 index 000000000..e30b9b316 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav3/7abe4912-4e21-4774-8011-482603f7bcc0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Brocav3/1762652579.5155342", + "retrieved_timestamp": "1762652579.515535", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-Brocav3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-Brocav3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6951776841004091 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6452353476182755 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38746223564954685 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35906040268456374 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4756354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.531748670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav6/63a1000f-1de8-42ef-a905-70b78bf46417.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav6/63a1000f-1de8-42ef-a905-70b78bf46417.json new file mode 100644 index 000000000..04c33b1bb --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav6/63a1000f-1de8-42ef-a905-70b78bf46417.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Brocav6/1762652579.515748", + "retrieved_timestamp": "1762652579.5157492", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-Brocav6", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-Brocav6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6995239298394925 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6388835266626555 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38746223564954685 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3674496644295302 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47420833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5319148936170213 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav7/6966d397-d336-455a-a156-c2e6430c813f.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav7/6966d397-d336-455a-a156-c2e6430c813f.json new file mode 100644 index 000000000..01716e63e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Brocav7/6966d397-d336-455a-a156-c2e6430c813f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Brocav7/1762652579.5159612", + "retrieved_timestamp": "1762652579.5159621", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-Brocav7", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-Brocav7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6723715297632504 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6444026981327182 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38444108761329304 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3674496644295302 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47960416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5257646276595744 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Emerged/15af5216-fc3d-4102-bbed-eb5b7d0ecf48.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Emerged/15af5216-fc3d-4102-bbed-eb5b7d0ecf48.json new file mode 100644 index 000000000..9fdbeaac2 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Emerged/15af5216-fc3d-4102-bbed-eb5b7d0ecf48.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Emerged/1762652579.516177", + "retrieved_timestamp": "1762652579.516178", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-Emerged", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-Emerged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7000237148543642 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6260033680703311 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.324773413897281 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3573825503355705 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46909375000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5186170212765957 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Emergedv3/7b125482-fd80-4f71-b398-9421333ee736.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Emergedv3/7b125482-fd80-4f71-b398-9421333ee736.json new file mode 100644 index 000000000..113bc6908 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Emergedv3/7b125482-fd80-4f71-b398-9421333ee736.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Emergedv3/1762652579.516385", + "retrieved_timestamp": "1762652579.516386", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-Emergedv3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-Emergedv3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6388493641316153 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6190728411056029 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43580060422960726 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36073825503355705 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4728125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5173703457446809 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-FinalMerge/36ebe0b7-51ae-4ea5-ba42-c9fd0d717259.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-FinalMerge/36ebe0b7-51ae-4ea5-ba42-c9fd0d717259.json new file mode 100644 index 000000000..57e08c13f --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-FinalMerge/36ebe0b7-51ae-4ea5-ba42-c9fd0d717259.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-FinalMerge/1762652579.516642", + "retrieved_timestamp": "1762652579.516643", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-FinalMerge", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-FinalMerge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48909781601705693 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5714945310011449 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3814199395770393 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3548657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43790625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4574468085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyper/8412921a-ad8c-4106-a3a1-9259d2ddb074.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyper/8412921a-ad8c-4106-a3a1-9259d2ddb074.json new file mode 100644 index 000000000..be5b29aaf --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyper/8412921a-ad8c-4106-a3a1-9259d2ddb074.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Hyper/1762652579.516851", + "retrieved_timestamp": "1762652579.516851", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-Hyper", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-Hyper" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5391317260424563 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6507453346766106 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34365558912386707 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39177852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48983333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5374002659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-HyperMarck-dl/5b6ef372-86e5-4fc1-85ba-5a76517bb10f.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-HyperMarck-dl/5b6ef372-86e5-4fc1-85ba-5a76517bb10f.json new file mode 100644 index 000000000..27f04b8e8 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-HyperMarck-dl/5b6ef372-86e5-4fc1-85ba-5a76517bb10f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-HyperMarck-dl/1762652579.5170581", + "retrieved_timestamp": "1762652579.517059", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-HyperMarck-dl", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-HyperMarck-dl" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6650276821057017 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6096480033153927 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5287009063444109 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3674496644295302 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4415625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5090591755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv3/d6700ad3-d858-4420-96b1-d690984ebcaa.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv3/d6700ad3-d858-4420-96b1-d690984ebcaa.json new file mode 100644 index 000000000..bc6ba41ad --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv3/d6700ad3-d858-4420-96b1-d690984ebcaa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Hyperionv3/1762652579.517266", + "retrieved_timestamp": "1762652579.517267", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-Hyperionv3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-Hyperionv3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6836371937570092 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6522165609411941 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37009063444108764 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37080536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47296875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5339926861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv4/7c4a43f8-be43-44d7-a514-f02b70ec367c.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv4/7c4a43f8-be43-44d7-a514-f02b70ec367c.json new file mode 100644 index 000000000..ff7c00a8f --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv4/7c4a43f8-be43-44d7-a514-f02b70ec367c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Hyperionv4/1762652579.517484", + "retrieved_timestamp": "1762652579.517484", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-Hyperionv4", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-Hyperionv4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5415796752616391 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6471791978856551 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3976510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48319791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5364029255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv5/5b1e2a5e-cd92-4ad4-b12d-0540461f9f5e.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv5/5b1e2a5e-cd92-4ad4-b12d-0540461f9f5e.json new file mode 100644 index 000000000..9d9d6d63e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Hyperionv5/5b1e2a5e-cd92-4ad4-b12d-0540461f9f5e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Hyperionv5/1762652579.517704", + "retrieved_timestamp": "1762652579.517704", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-Hyperionv5", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-Hyperionv5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6729211824625327 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.644265785086055 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3821752265861027 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3716442953020134 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4795416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5301695478723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-MegaMerge-pt2/f269bb45-d627-49b9-953b-5c8591433aa7.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-MegaMerge-pt2/f269bb45-d627-49b9-953b-5c8591433aa7.json new file mode 100644 index 000000000..529ec4744 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-MegaMerge-pt2/f269bb45-d627-49b9-953b-5c8591433aa7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-MegaMerge-pt2/1762652579.517905", + "retrieved_timestamp": "1762652579.517906", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-MegaMerge-pt2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-MegaMerge-pt2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.568307645935008 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6577703330510146 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3995468277945619 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37919463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.472875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5420545212765957 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-MergeStock/c1db0f86-a3d9-4aa4-9fe3-0442fc63ad25.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-MergeStock/c1db0f86-a3d9-4aa4-9fe3-0442fc63ad25.json new file mode 100644 index 000000000..e9c57e443 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-MergeStock/c1db0f86-a3d9-4aa4-9fe3-0442fc63ad25.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-MergeStock/1762652579.518343", + "retrieved_timestamp": "1762652579.518346", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-MergeStock", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-MergeStock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5685326046002386 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6579336391923106 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41465256797583083 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3733221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4676354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.539561170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-ReasoningMerge/df6199fa-3797-4b88-b5fc-e429f513932b.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-ReasoningMerge/df6199fa-3797-4b88-b5fc-e429f513932b.json new file mode 100644 index 000000000..9d854e992 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-ReasoningMerge/df6199fa-3797-4b88-b5fc-e429f513932b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-ReasoningMerge/1762652579.518682", + "retrieved_timestamp": "1762652579.518684", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-ReasoningMerge", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-ReasoningMerge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46054690443578594 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6578226399295218 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.520392749244713 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4077181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5165937500000001 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5344913563829787 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Ultimav2/b76ac8f6-7355-4bbf-ad8f-d8fc967120a1.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Ultimav2/b76ac8f6-7355-4bbf-ad8f-d8fc967120a1.json new file mode 100644 index 000000000..b7a0723ea --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Ultimav2/b76ac8f6-7355-4bbf-ad8f-d8fc967120a1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Ultimav2/1762652579.519061", + "retrieved_timestamp": "1762652579.5190778", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-Ultimav2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-Ultimav2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5500228283177524 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6555027486976712 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38444108761329304 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3850671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4965625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5417220744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Unity/efd5d269-fc83-43f0-9054-dc3bdf40f180.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Unity/efd5d269-fc83-43f0-9054-dc3bdf40f180.json new file mode 100644 index 000000000..8c4503dbd --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Unity/efd5d269-fc83-43f0-9054-dc3bdf40f180.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Unity/1762652579.519516", + "retrieved_timestamp": "1762652579.519517", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-Unity", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-Unity" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6738952645646883 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6019955540977778 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4312688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34731543624161076 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4679479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.507563164893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernicke-SLERP/8359ce66-d904-4092-92be-5e2dbb372677.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernicke-SLERP/8359ce66-d904-4092-92be-5e2dbb372677.json new file mode 100644 index 000000000..a9b8408a1 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernicke-SLERP/8359ce66-d904-4092-92be-5e2dbb372677.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Wernicke-SLERP/1762652579.5203562", + "retrieved_timestamp": "1762652579.5203571", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-Wernicke-SLERP", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-Wernicke-SLERP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5588904107767391 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6440929009604598 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4486404833836858 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34395973154362414 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41403125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5093916223404256 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.491 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernicke/6c2287bb-69b0-4b23-ba15-ff4a600e4aa7.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernicke/6c2287bb-69b0-4b23-ba15-ff4a600e4aa7.json new file mode 100644 index 000000000..9e3cbd3e3 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernicke/6c2287bb-69b0-4b23-ba15-ff4a600e4aa7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Wernicke/1762652579.519787", + "retrieved_timestamp": "1762652579.519788", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-Wernicke", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-Wernicke" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5234699486252034 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6568359662501574 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3814199395770393 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3934563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46890625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5423869680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernickev3/a4f5037a-381b-4726-b90d-ba559058772c.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernickev3/a4f5037a-381b-4726-b90d-ba559058772c.json new file mode 100644 index 000000000..dc17ed671 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-Wernickev3/a4f5037a-381b-4726-b90d-ba559058772c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Wernickev3/1762652579.520611", + "retrieved_timestamp": "1762652579.520612", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-Wernickev3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-Wernickev3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7048198779239085 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6184146992839421 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3542296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3624161073825503 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4716666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.515126329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-partialmergept1/852ffa19-285b-4037-ac60-63f24cafcecb.json b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-partialmergept1/852ffa19-285b-4037-ac60-63f24cafcecb.json new file mode 100644 index 000000000..684385f0a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwen2.5-14B-partialmergept1/852ffa19-285b-4037-ac60-63f24cafcecb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-partialmergept1/1762652579.5208588", + "retrieved_timestamp": "1762652579.52086", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwen2.5-14B-partialmergept1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwen2.5-14B-partialmergept1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.633728507028019 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6151178406213536 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45392749244712993 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3615771812080537 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47569791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5207779255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/Qwenfinity-2.5-14B/4fba9290-886e-490d-aaeb-068f8c679006.json b/data/hfopenllm_v2/alibaba/CultriX/Qwenfinity-2.5-14B/4fba9290-886e-490d-aaeb-068f8c679006.json new file mode 100644 index 000000000..c1626f236 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/Qwenfinity-2.5-14B/4fba9290-886e-490d-aaeb-068f8c679006.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_Qwenfinity-2.5-14B/1762652579.521086", + "retrieved_timestamp": "1762652579.521087", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/Qwenfinity-2.5-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/Qwenfinity-2.5-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4813794066410457 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5655007271970033 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41012084592145015 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.348993288590604 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45058333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4498005319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-EvolMerge/44823eb6-717b-4508-a745-7821545dd3c2.json b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-EvolMerge/44823eb6-717b-4508-a745-7821545dd3c2.json new file mode 100644 index 000000000..f68077ce1 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-EvolMerge/44823eb6-717b-4508-a745-7821545dd3c2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14B-EvolMerge/1762652579.5218382", + "retrieved_timestamp": "1762652579.5218382", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/SeQwence-14B-EvolMerge", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/SeQwence-14B-EvolMerge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5381576439403006 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6572183434723883 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36706948640483383 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48208333333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5418882978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-EvolMergev1/e2621a1f-af39-48fe-a56b-18e9b396a476.json b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-EvolMergev1/e2621a1f-af39-48fe-a56b-18e9b396a476.json new file mode 100644 index 000000000..0a738c856 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-EvolMergev1/e2621a1f-af39-48fe-a56b-18e9b396a476.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14B-EvolMergev1/1762652579.5221288", + "retrieved_timestamp": "1762652579.52213", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/SeQwence-14B-EvolMergev1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/SeQwence-14B-EvolMergev1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5554683794554005 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6545547382762975 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4214501510574018 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3766778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46227083333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.539311835106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-v5/6a7ae44e-93f6-4371-b3a6-585a099aa7c7.json b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-v5/6a7ae44e-93f6-4371-b3a6-585a099aa7c7.json new file mode 100644 index 000000000..4c6ed82a3 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B-v5/6a7ae44e-93f6-4371-b3a6-585a099aa7c7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14B-v5/1762652579.522369", + "retrieved_timestamp": "1762652579.522369", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/SeQwence-14B-v5", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/SeQwence-14B-v5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5919881470055011 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6517093605796943 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33081570996978854 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3699664429530201 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47141666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5414727393617021 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B/b9f3e9d1-e1f9-44cd-9067-c949adfbe553.json b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B/b9f3e9d1-e1f9-44cd-9067-c949adfbe553.json new file mode 100644 index 000000000..b4de2b6ae --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14B/b9f3e9d1-e1f9-44cd-9067-c949adfbe553.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14B/1762652579.521544", + "retrieved_timestamp": "1762652579.521545", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/SeQwence-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/SeQwence-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5351600420218354 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6505665291288972 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35347432024169184 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36073825503355705 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46661458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5418882978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv1/f4505219-fc0d-4f7b-ad71-3c9fef064c28.json b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv1/f4505219-fc0d-4f7b-ad71-3c9fef064c28.json new file mode 100644 index 000000000..b6ba23a9e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv1/f4505219-fc0d-4f7b-ad71-3c9fef064c28.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14Bv1/1762652579.522592", + "retrieved_timestamp": "1762652579.522593", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/SeQwence-14Bv1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/SeQwence-14Bv1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6678003253589365 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6344673727103446 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3610271903323263 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3615771812080537 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47042708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.531998005319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv2/49eccc70-6321-451b-87e9-29907cfb53a0.json b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv2/49eccc70-6321-451b-87e9-29907cfb53a0.json new file mode 100644 index 000000000..a54806499 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv2/49eccc70-6321-451b-87e9-29907cfb53a0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14Bv2/1762652579.5228019", + "retrieved_timestamp": "1762652579.5228028", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/SeQwence-14Bv2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/SeQwence-14Bv2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5785992278266112 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6304512627108576 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47583081570996977 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36073825503355705 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4601041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5334109042553191 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv3/4857c00b-e4fb-417a-8b63-a5b7e9298b40.json b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv3/4857c00b-e4fb-417a-8b63-a5b7e9298b40.json new file mode 100644 index 000000000..40dab72e5 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/CultriX/SeQwence-14Bv3/4857c00b-e4fb-417a-8b63-a5b7e9298b40.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14Bv3/1762652579.523057", + "retrieved_timestamp": "1762652579.523058", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CultriX/SeQwence-14Bv3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "CultriX/SeQwence-14Bv3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5719047682371663 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6302253848409948 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47658610271903323 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3649328859060403 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4624270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5334940159574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Danielbrdz/Barcenas-R1-Qwen-1.5b/c5330fb2-e914-4170-81f8-77a317ba557c.json b/data/hfopenllm_v2/alibaba/Danielbrdz/Barcenas-R1-Qwen-1.5b/c5330fb2-e914-4170-81f8-77a317ba557c.json new file mode 100644 index 000000000..ff582637b --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Danielbrdz/Barcenas-R1-Qwen-1.5b/c5330fb2-e914-4170-81f8-77a317ba557c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-R1-Qwen-1.5b/1762652579.5346482", + "retrieved_timestamp": "1762652579.5346491", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Danielbrdz/Barcenas-R1-Qwen-1.5b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Danielbrdz/Barcenas-R1-Qwen-1.5b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24280132271262472 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35872011187392944 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3496978851963746 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.354125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19090757978723405 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/4b7dd9db-5e94-4885-96f8-189af8d97c09.json b/data/hfopenllm_v2/alibaba/DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/4b7dd9db-5e94-4885-96f8-189af8d97c09.json new file mode 100644 index 000000000..dc0d69f0e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/4b7dd9db-5e94-4885-96f8-189af8d97c09.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/1762652579.53886", + "retrieved_timestamp": "1762652579.53886", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34159474638403875 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.580689592371853 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5536253776435045 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3859060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5155104166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4623503989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 25.506 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/78e7f7ee-3677-499a-aa36-2e8bf0902bf0.json b/data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/78e7f7ee-3677-499a-aa36-2e8bf0902bf0.json new file mode 100644 index 000000000..ada50a45c --- /dev/null +++ b/data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/78e7f7ee-3677-499a-aa36-2e8bf0902bf0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/1762652579.543009", + "retrieved_timestamp": "1762652579.543009", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17832905579418165 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30326053640004424 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.024924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3714583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11419547872340426 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 4.089 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/d65793ba-f363-4665-9ff5-1ac08e819d55.json b/data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/d65793ba-f363-4665-9ff5-1ac08e819d55.json new file mode 100644 index 000000000..4d97c25e2 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/d65793ba-f363-4665-9ff5-1ac08e819d55.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/1762652579.543224", + "retrieved_timestamp": "1762652579.543225", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28351773294857646 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35922718767499157 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24169184290030213 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38469791666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1636469414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 19.022 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/c142222c-836d-493f-a9f8-857426e0573c.json b/data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/c142222c-836d-493f-a9f8-857426e0573c.json new file mode 100644 index 000000000..9aab308d4 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/c142222c-836d-493f-a9f8-857426e0573c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/1762652579.543571", + "retrieved_timestamp": "1762652579.543573", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21067766858601844 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32861776640637924 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24748322147651006 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3404479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11220079787234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 8.714 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita/6669c8b8-91d6-4f14-8cfb-a6422352850d.json b/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita/6669c8b8-91d6-4f14-8cfb-a6422352850d.json new file mode 100644 index 000000000..147df673e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita/6669c8b8-91d6-4f14-8cfb-a6422352850d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita/1762652579.5521228", + "retrieved_timestamp": "1762652579.5521238", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepMount00/Qwen2-1.5B-Ita", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "DeepMount00/Qwen2-1.5B-Ita" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5173495214918638 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39805765159128703 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11404833836858005 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35037500000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2771775265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v2/78ec8596-ee15-4e94-8bc8-77c6bdffc541.json b/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v2/78ec8596-ee15-4e94-8bc8-77c6bdffc541.json new file mode 100644 index 000000000..b91ff2f79 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v2/78ec8596-ee15-4e94-8bc8-77c6bdffc541.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita_v2/1762652579.552372", + "retrieved_timestamp": "1762652579.552373", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepMount00/Qwen2-1.5B-Ita_v2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "DeepMount00/Qwen2-1.5B-Ita_v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49998891829235315 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3953827803974795 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09667673716012085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37018749999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30319148936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v3/f9cac378-3bdb-4c66-8193-502773c5c5eb.json b/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v3/f9cac378-3bdb-4c66-8193-502773c5c5eb.json new file mode 100644 index 000000000..0b93ed10a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v3/f9cac378-3bdb-4c66-8193-502773c5c5eb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita_v3/1762652579.552576", + "retrieved_timestamp": "1762652579.552577", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepMount00/Qwen2-1.5B-Ita_v3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "DeepMount00/Qwen2-1.5B-Ita_v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4890479483326463 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3948478837209111 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37415624999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3017785904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v5/04f0529b-474c-42d2-99a8-e3bdd5c18eaf.json b/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v5/04f0529b-474c-42d2-99a8-e3bdd5c18eaf.json new file mode 100644 index 000000000..3846d0a27 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v5/04f0529b-474c-42d2-99a8-e3bdd5c18eaf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita_v5/1762652579.552789", + "retrieved_timestamp": "1762652579.55279", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepMount00/Qwen2-1.5B-Ita_v5", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "DeepMount00/Qwen2-1.5B-Ita_v5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4987400098405564 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40320443289745417 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11782477341389729 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34225 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29429853723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v6/041f6e95-b7d1-44c6-a995-0c8257e188aa.json b/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v6/041f6e95-b7d1-44c6-a995-0c8257e188aa.json new file mode 100644 index 000000000..d415525f2 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/DeepMount00/Qwen2-1.5B-Ita_v6/041f6e95-b7d1-44c6-a995-0c8257e188aa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita_v6/1762652579.553008", + "retrieved_timestamp": "1762652579.5530088", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepMount00/Qwen2-1.5B-Ita_v6", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "DeepMount00/Qwen2-1.5B-Ita_v6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29990425404593146 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42486081646897506 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3754583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28715093085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.497 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO/b36b915f-3c4a-40e8-ab78-8442dbe116e1.json b/data/hfopenllm_v2/alibaba/Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO/b36b915f-3c4a-40e8-ab78-8442dbe116e1.json new file mode 100644 index 000000000..12c33828d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO/b36b915f-3c4a-40e8-ab78-8442dbe116e1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Dongwei_DeepSeek-R1-Distill-Qwen-7B-GRPO/1762652579.5556989", + "retrieved_timestamp": "1762652579.5557", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40376866713653103 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34425676981862185 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19561933534743203 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36628124999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23221409574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2/3ba36700-5019-4525-bf5e-6a87cce7ecc5.json b/data/hfopenllm_v2/alibaba/EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2/3ba36700-5019-4525-bf5e-6a87cce7ecc5.json new file mode 100644 index 000000000..2ca33d6a7 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2/3ba36700-5019-4525-bf5e-6a87cce7ecc5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EVA-UNIT-01_EVA-Qwen2.5-14B-v0.2/1762652579.5920892", + "retrieved_timestamp": "1762652579.5920892", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4038429145777648 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6090237540046592 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3406344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39429530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4794479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5135472074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2/9e315ba7-3eea-4934-822e-461e64bf8551.json b/data/hfopenllm_v2/alibaba/EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2/9e315ba7-3eea-4934-822e-461e64bf8551.json new file mode 100644 index 000000000..24f5f73cf --- /dev/null +++ b/data/hfopenllm_v2/alibaba/EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2/9e315ba7-3eea-4934-822e-461e64bf8551.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EVA-UNIT-01_EVA-Qwen2.5-72B-v0.2/1762652579.59233", + "retrieved_timestamp": "1762652579.592331", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6878837041272712 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7088012228048761 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4312688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4085570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47197916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.581283244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Etherll/Qwen2.5-7B-della-test/777b5587-70b2-472f-a6e4-820d653669cd.json b/data/hfopenllm_v2/alibaba/Etherll/Qwen2.5-7B-della-test/777b5587-70b2-472f-a6e4-820d653669cd.json new file mode 100644 index 000000000..839c471d1 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Etherll/Qwen2.5-7B-della-test/777b5587-70b2-472f-a6e4-820d653669cd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Etherll_Qwen2.5-7B-della-test/1762652579.614594", + "retrieved_timestamp": "1762652579.6145952", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Etherll/Qwen2.5-7B-della-test", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Etherll/Qwen2.5-7B-della-test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7624968417133207 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5447331985391859 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48942598187311176 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40469791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4360871010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/HPAI-BSC/Qwen2.5-Aloe-Beta-7B/a99dbb21-4f7d-4ac0-b403-2f8bf7aa92b1.json b/data/hfopenllm_v2/alibaba/HPAI-BSC/Qwen2.5-Aloe-Beta-7B/a99dbb21-4f7d-4ac0-b403-2f8bf7aa92b1.json new file mode 100644 index 000000000..1b8712285 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/HPAI-BSC/Qwen2.5-Aloe-Beta-7B/a99dbb21-4f7d-4ac0-b403-2f8bf7aa92b1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HPAI-BSC_Qwen2.5-Aloe-Beta-7B/1762652579.6368651", + "retrieved_timestamp": "1762652579.636866", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HPAI-BSC/Qwen2.5-Aloe-Beta-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "HPAI-BSC/Qwen2.5-Aloe-Beta-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4553506917201914 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5048995904321122 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3542296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42603125000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4354222074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/HeraiHench/DeepSeek-R1-Qwen-Coder-8B/a0730f18-1058-44b4-b6b6-0881ae2e6338.json b/data/hfopenllm_v2/alibaba/HeraiHench/DeepSeek-R1-Qwen-Coder-8B/a0730f18-1058-44b4-b6b6-0881ae2e6338.json new file mode 100644 index 000000000..0beb66054 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/HeraiHench/DeepSeek-R1-Qwen-Coder-8B/a0730f18-1058-44b4-b6b6-0881ae2e6338.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HeraiHench_DeepSeek-R1-Qwen-Coder-8B/1762652579.6392472", + "retrieved_timestamp": "1762652579.639248", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HeraiHench/DeepSeek-R1-Qwen-Coder-8B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "HeraiHench/DeepSeek-R1-Qwen-Coder-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1869472998311148 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29134447696551025 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37384375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11228390957446809 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 8.164 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/HeraiHench/Double-Down-Qwen-Math-7B/6e852e78-e666-413e-ac29-ad374bbc74f2.json b/data/hfopenllm_v2/alibaba/HeraiHench/Double-Down-Qwen-Math-7B/6e852e78-e666-413e-ac29-ad374bbc74f2.json new file mode 100644 index 000000000..785cc16c5 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/HeraiHench/Double-Down-Qwen-Math-7B/6e852e78-e666-413e-ac29-ad374bbc74f2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HeraiHench_Double-Down-Qwen-Math-7B/1762652579.63955", + "retrieved_timestamp": "1762652579.639551", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HeraiHench/Double-Down-Qwen-Math-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "HeraiHench/Double-Down-Qwen-Math-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1669636564316015 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2844613514203868 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0007552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37365625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11120345744680851 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/HeraiHench/Marge-Qwen-Math-7B/07f4a9dc-16d7-4b75-922f-09f8e9ebed7d.json b/data/hfopenllm_v2/alibaba/HeraiHench/Marge-Qwen-Math-7B/07f4a9dc-16d7-4b75-922f-09f8e9ebed7d.json new file mode 100644 index 000000000..999966456 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/HeraiHench/Marge-Qwen-Math-7B/07f4a9dc-16d7-4b75-922f-09f8e9ebed7d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HeraiHench_Marge-Qwen-Math-7B/1762652579.6397812", + "retrieved_timestamp": "1762652579.639782", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HeraiHench/Marge-Qwen-Math-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "HeraiHench/Marge-Qwen-Math-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12622175826806206 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3068846024368302 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.005287009063444109 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23909395973154363 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39390624999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10555186170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-IRPO-1epoch/0cbb4771-926d-4cf6-a78b-a5f4ac4d5902.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-IRPO-1epoch/0cbb4771-926d-4cf6-a78b-a5f4ac4d5902.json new file mode 100644 index 000000000..e76df15df --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-IRPO-1epoch/0cbb4771-926d-4cf6-a78b-a5f4ac4d5902.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-IRPO-1epoch/1762652579.652392", + "retrieved_timestamp": "1762652579.6523929", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen-0.5B-IRPO-1epoch", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen-0.5B-IRPO-1epoch" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25891301746033857 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31638216610052033 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03172205438066465 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24664429530201343 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3286354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15001662234042554 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-IRPO-5epoch/301f71c8-fc1f-42e8-9029-f9d03574872b.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-IRPO-5epoch/301f71c8-fc1f-42e8-9029-f9d03574872b.json new file mode 100644 index 000000000..8d27e3a66 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-IRPO-5epoch/301f71c8-fc1f-42e8-9029-f9d03574872b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-IRPO-5epoch/1762652579.652645", + "retrieved_timestamp": "1762652579.652645", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen-0.5B-IRPO-5epoch", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen-0.5B-IRPO-5epoch" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24867130325314607 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31891656220326015 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0324773413897281 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23993288590604026 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32866666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1506815159574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-eDPO-1epoch/65e2f2b2-cb5b-40f3-b23a-8c0d185de219.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-eDPO-1epoch/65e2f2b2-cb5b-40f3-b23a-8c0d185de219.json new file mode 100644 index 000000000..1e4486fdd --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-eDPO-1epoch/65e2f2b2-cb5b-40f3-b23a-8c0d185de219.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-eDPO-1epoch/1762652579.652854", + "retrieved_timestamp": "1762652579.6528552", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen-0.5B-eDPO-1epoch", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen-0.5B-eDPO-1epoch" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26233504878167707 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3180637583450692 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2424496644295302 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33269791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15525265957446807 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-eDPO-5epoch/062a1dcd-2553-4657-8f89-a481ff62a193.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-eDPO-5epoch/062a1dcd-2553-4657-8f89-a481ff62a193.json new file mode 100644 index 000000000..df8990171 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen-0.5B-eDPO-5epoch/062a1dcd-2553-4657-8f89-a481ff62a193.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-eDPO-5epoch/1762652579.653099", + "retrieved_timestamp": "1762652579.6531", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen-0.5B-eDPO-5epoch", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen-0.5B-eDPO-5epoch" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24774708883540117 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3096491823869347 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.023413897280966767 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24916107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3326354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15226063829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/82b47608-08b5-4368-bead-aa117736c06d.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/82b47608-08b5-4368-bead-aa117736c06d.json new file mode 100644 index 000000000..08a468860 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/82b47608-08b5-4368-bead-aa117736c06d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/1762652579.680979", + "retrieved_timestamp": "1762652579.68098", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2573892826589006 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3279091360416723 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.055891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31685416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16505984042553193 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/747310d0-7c30-4261-b2e8-a783d8753e9a.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/747310d0-7c30-4261-b2e8-a783d8753e9a.json new file mode 100644 index 000000000..d7d16f9d3 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/747310d0-7c30-4261-b2e8-a783d8753e9a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/1762652579.6812391", + "retrieved_timestamp": "1762652579.68124", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3072481017034801 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32638442794247285 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0581570996978852 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31564583333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1624002659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/a7b6a07a-70fc-4d34-9a92-265b848d22d7.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/a7b6a07a-70fc-4d34-9a92-265b848d22d7.json new file mode 100644 index 000000000..8930e1edd --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/a7b6a07a-70fc-4d34-9a92-265b848d22d7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/1762652579.68145", + "retrieved_timestamp": "1762652579.68145", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25509093649294984 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3242353334886223 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04682779456193353 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31825 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15741356382978725 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/99139c71-a4f2-45d7-95b8-a8b7720681aa.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/99139c71-a4f2-45d7-95b8-a8b7720681aa.json new file mode 100644 index 000000000..046746145 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/99139c71-a4f2-45d7-95b8-a8b7720681aa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/1762652579.681671", + "retrieved_timestamp": "1762652579.681671", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26358395723347383 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3198054258965539 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32615625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15857712765957446 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/6407040d-023d-476a-ac79-ef85e104eace.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/6407040d-023d-476a-ac79-ef85e104eace.json new file mode 100644 index 000000000..07f30a24f --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/6407040d-023d-476a-ac79-ef85e104eace.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/1762652579.681885", + "retrieved_timestamp": "1762652579.681886", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23228478215579107 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3254731912466387 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03700906344410876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25083892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31688541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16115359042553193 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/64f71756-0a54-4a42-a96a-7056071c7dd0.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/64f71756-0a54-4a42-a96a-7056071c7dd0.json new file mode 100644 index 000000000..2b8a74b30 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/64f71756-0a54-4a42-a96a-7056071c7dd0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/1762652579.682102", + "retrieved_timestamp": "1762652579.682102", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24137732328000816 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3314225693635648 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33415625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15317486702127658 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/8c18d418-a0a4-435a-b31f-7d879c793b4c.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/8c18d418-a0a4-435a-b31f-7d879c793b4c.json new file mode 100644 index 000000000..3c08ebb38 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/8c18d418-a0a4-435a-b31f-7d879c793b4c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/1762652579.6823108", + "retrieved_timestamp": "1762652579.6823108", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2677805999193252 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3361518077587983 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33815625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15608377659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/75e153a7-d699-4822-90b6-9d7da259e124.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/75e153a7-d699-4822-90b6-9d7da259e124.json new file mode 100644 index 000000000..ea88ce1ed --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/75e153a7-d699-4822-90b6-9d7da259e124.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/1762652579.682508", + "retrieved_timestamp": "1762652579.682509", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25606501859510544 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3231121828613069 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31955208333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1589095744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/836cc2ab-edbc-45fa-af8c-034d0239635b.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/836cc2ab-edbc-45fa-af8c-034d0239635b.json new file mode 100644 index 000000000..a9670a48e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/836cc2ab-edbc-45fa-af8c-034d0239635b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/1762652579.682722", + "retrieved_timestamp": "1762652579.682723", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2639086512675257 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3257435380157632 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32085416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15866023936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/f270e1bd-7e75-4c6c-a701-9def96275025.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/f270e1bd-7e75-4c6c-a701-9def96275025.json new file mode 100644 index 000000000..6f3b3f5c9 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/f270e1bd-7e75-4c6c-a701-9def96275025.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/1762652579.682945", + "retrieved_timestamp": "1762652579.682946", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2517686405404327 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213578303108222 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31688541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1584940159574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/02ec1b4f-f1e0-4c46-bff2-1475e95cff80.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/02ec1b4f-f1e0-4c46-bff2-1475e95cff80.json new file mode 100644 index 000000000..26ec77b4e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/02ec1b4f-f1e0-4c46-bff2-1475e95cff80.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/1762652579.683157", + "retrieved_timestamp": "1762652579.683158", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24382527249919106 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3266053460297184 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.061933534743202415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31955208333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15541888297872342 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/9da4a976-09a2-4f1c-a15e-d498a2adfdd4.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/9da4a976-09a2-4f1c-a15e-d498a2adfdd4.json new file mode 100644 index 000000000..34dc60ea4 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/9da4a976-09a2-4f1c-a15e-d498a2adfdd4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/1762652579.6833699", + "retrieved_timestamp": "1762652579.683371", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24654804806801509 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32458923603023143 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.052870090634441085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31821875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15633311170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/c3a945da-be07-4132-b558-f20202530b4d.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/c3a945da-be07-4132-b558-f20202530b4d.json new file mode 100644 index 000000000..1875dd621 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/c3a945da-be07-4132-b558-f20202530b4d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/1762652579.683736", + "retrieved_timestamp": "1762652579.683738", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2505695997730466 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32614538576285174 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04984894259818731 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33818750000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15217752659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/723afa16-d986-421c-a6ec-d1b00cb9d765.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/723afa16-d986-421c-a6ec-d1b00cb9d765.json new file mode 100644 index 000000000..307700f27 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/723afa16-d986-421c-a6ec-d1b00cb9d765.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/1762652579.684093", + "retrieved_timestamp": "1762652579.684094", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24567370133468086 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179765517720094 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3315208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15658244680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/03e5cd5c-adc0-49d8-9e51-3e315d0bffd6.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/03e5cd5c-adc0-49d8-9e51-3e315d0bffd6.json new file mode 100644 index 000000000..c4ba16fd4 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/03e5cd5c-adc0-49d8-9e51-3e315d0bffd6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/1762652579.684393", + "retrieved_timestamp": "1762652579.684394", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24539887498503968 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32157618750132033 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33818750000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1544215425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/6992c085-939e-48b0-8c8f-53d6ca9737de.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/6992c085-939e-48b0-8c8f-53d6ca9737de.json new file mode 100644 index 000000000..75f084ba1 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/6992c085-939e-48b0-8c8f-53d6ca9737de.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/1762652579.684617", + "retrieved_timestamp": "1762652579.684618", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2341830786756916 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3189252460411593 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33015625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15799534574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/59e7ed2b-8385-4c83-b357-6dfa52e429cc.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/59e7ed2b-8385-4c83-b357-6dfa52e429cc.json new file mode 100644 index 000000000..8f1441b72 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/59e7ed2b-8385-4c83-b357-6dfa52e429cc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/1762652579.684837", + "retrieved_timestamp": "1762652579.684837", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23196008812173918 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3233548545784329 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03927492447129909 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33688541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15425531914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/495ed31f-9cbc-4f6f-b4be-2b9ee8f5011c.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/495ed31f-9cbc-4f6f-b4be-2b9ee8f5011c.json new file mode 100644 index 000000000..bb3db6056 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/495ed31f-9cbc-4f6f-b4be-2b9ee8f5011c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/1762652579.6850612", + "retrieved_timestamp": "1762652579.685062", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24175188499847072 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3175499101875348 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04229607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15799534574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/6c5809dc-67b3-4567-8d1f-4a8104a11507.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/6c5809dc-67b3-4567-8d1f-4a8104a11507.json new file mode 100644 index 000000000..1f5f59c0f --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/6c5809dc-67b3-4567-8d1f-4a8104a11507.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/1762652579.6852841", + "retrieved_timestamp": "1762652579.685285", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24932069132124984 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196623899087389 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04229607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33148958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15708111702127658 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2Model", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/44c78761-2672-49c4-85f4-9b0d575dd914.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/44c78761-2672-49c4-85f4-9b0d575dd914.json new file mode 100644 index 000000000..e3f25fc2b --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/44c78761-2672-49c4-85f4-9b0d575dd914.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/1762652579.685507", + "retrieved_timestamp": "1762652579.685508", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2520434668900739 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3197552188491219 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04229607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3261875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15508643617021275 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/b33d4765-4633-4c2b-a118-1ed82b0c842b.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/b33d4765-4633-4c2b-a118-1ed82b0c842b.json new file mode 100644 index 000000000..6e4a9a978 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/b33d4765-4633-4c2b-a118-1ed82b0c842b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/1762652579.685728", + "retrieved_timestamp": "1762652579.685728", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25803867072700437 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3248229336342538 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34215625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15392287234042554 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/8d200434-ef84-403e-9fb6-86c15c4ccfed.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/8d200434-ef84-403e-9fb6-86c15c4ccfed.json new file mode 100644 index 000000000..4f3504076 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/8d200434-ef84-403e-9fb6-86c15c4ccfed.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/1762652579.685941", + "retrieved_timestamp": "1762652579.685942", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23196008812173918 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.326545450978746 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27097315436241615 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33948958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15367353723404256 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/3a666f3f-f2ea-4fed-b2fe-750b759eae7a.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/3a666f3f-f2ea-4fed-b2fe-750b759eae7a.json new file mode 100644 index 000000000..389fd204b --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/3a666f3f-f2ea-4fed-b2fe-750b759eae7a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/1762652579.686151", + "retrieved_timestamp": "1762652579.686152", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2487710386219675 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3272739110084265 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04607250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33415625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15309175531914893 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/7fbad2de-a9da-4962-ae18-47298811ba5b.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/7fbad2de-a9da-4962-ae18-47298811ba5b.json new file mode 100644 index 000000000..f6c5e30ce --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/7fbad2de-a9da-4962-ae18-47298811ba5b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/1762652579.686357", + "retrieved_timestamp": "1762652579.686357", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25236816092412573 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3129690310926447 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0445619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32885416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15641622340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/1fad00cf-e472-42dc-8b87-a0501cb051ab.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/1fad00cf-e472-42dc-8b87-a0501cb051ab.json new file mode 100644 index 000000000..6050cbf10 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/1fad00cf-e472-42dc-8b87-a0501cb051ab.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/1762652579.686578", + "retrieved_timestamp": "1762652579.686579", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2513940788219702 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.322095658026178 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33148958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15383976063829788 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/c68fad94-ce6a-4053-b991-2c1e660fe7d9.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/c68fad94-ce6a-4053-b991-2c1e660fe7d9.json new file mode 100644 index 000000000..45617c92e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/c68fad94-ce6a-4053-b991-2c1e660fe7d9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/1762652579.686833", + "retrieved_timestamp": "1762652579.6868339", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24567370133468086 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3180087717709833 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3275208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15724734042553193 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/a6a3ee79-a93b-4220-ac09-1c5d2f70cdf8.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/a6a3ee79-a93b-4220-ac09-1c5d2f70cdf8.json new file mode 100644 index 000000000..cbf51fc8d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/a6a3ee79-a93b-4220-ac09-1c5d2f70cdf8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/1762652579.6870458", + "retrieved_timestamp": "1762652579.687047", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26363382491788456 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31806866682195567 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3235208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15741356382978725 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/e3471a51-fad2-44cf-bd0c-ad1250d22f83.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/e3471a51-fad2-44cf-bd0c-ad1250d22f83.json new file mode 100644 index 000000000..41b7c5585 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/e3471a51-fad2-44cf-bd0c-ad1250d22f83.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/1762652579.6873431", + "retrieved_timestamp": "1762652579.687347", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24829674153468353 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3174312444218736 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0377643504531722 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33279166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1558344414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/5a3a76e9-f93d-435c-898c-b76bc5dc0cda.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/5a3a76e9-f93d-435c-898c-b76bc5dc0cda.json new file mode 100644 index 000000000..06bfbe1e5 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/5a3a76e9-f93d-435c-898c-b76bc5dc0cda.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/1762652579.687733", + "retrieved_timestamp": "1762652579.687735", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2517686405404327 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3218020653711833 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.052870090634441085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32348958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15949135638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/fc83f198-e606-4c3d-aede-cb646b080b3b.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/fc83f198-e606-4c3d-aede-cb646b080b3b.json new file mode 100644 index 000000000..cf6e4913a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/fc83f198-e606-4c3d-aede-cb646b080b3b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/1762652579.6880698", + "retrieved_timestamp": "1762652579.688079", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25361706937592254 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3234331515135053 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04909365558912387 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32355208333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15965757978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/e0452e02-8cf3-4da6-83f6-844f1de6fac2.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/e0452e02-8cf3-4da6-83f6-844f1de6fac2.json new file mode 100644 index 000000000..9495aa32f --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/e0452e02-8cf3-4da6-83f6-844f1de6fac2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/1762652579.688372", + "retrieved_timestamp": "1762652579.688373", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24479935460134664 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32395300683134437 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32485416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15866023936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/0792bedd-3891-4622-983b-886c126ace68.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/0792bedd-3891-4622-983b-886c126ace68.json new file mode 100644 index 000000000..baa747480 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/0792bedd-3891-4622-983b-886c126ace68.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/1762652579.688585", + "retrieved_timestamp": "1762652579.688586", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25046986440422525 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.322699453909483 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04682779456193353 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3209166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1589095744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/31e52020-32b2-4271-89b5-31dfde730404.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/31e52020-32b2-4271-89b5-31dfde730404.json new file mode 100644 index 000000000..66e9a85bb --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/31e52020-32b2-4271-89b5-31dfde730404.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/1762652579.6888041", + "retrieved_timestamp": "1762652579.688805", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24719743613611883 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.325505796038594 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04984894259818731 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32079166666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15866023936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/06074d49-defe-4303-9899-18f074a06935.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/06074d49-defe-4303-9899-18f074a06935.json new file mode 100644 index 000000000..d6fc40f26 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/06074d49-defe-4303-9899-18f074a06935.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/1762652579.689013", + "retrieved_timestamp": "1762652579.689014", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24165214962964932 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3255889369754366 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0581570996978852 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32745833333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15625 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/1ef0a501-863d-49dc-9bda-5151fb161b41.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/1ef0a501-863d-49dc-9bda-5151fb161b41.json new file mode 100644 index 000000000..c1c7bef92 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/1ef0a501-863d-49dc-9bda-5151fb161b41.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/1762652579.689225", + "retrieved_timestamp": "1762652579.689225", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2526928549581776 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32354099176995715 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32348958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15799534574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/15177605-2eea-4d8a-8462-7b64f7d29071.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/15177605-2eea-4d8a-8462-7b64f7d29071.json new file mode 100644 index 000000000..0efefab1c --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/15177605-2eea-4d8a-8462-7b64f7d29071.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/1762652579.68944", + "retrieved_timestamp": "1762652579.689441", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26685638550158025 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3313735254746672 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07099697885196375 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3168229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16339760638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/09996570-4086-46c5-900e-887c3d5d5826.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/09996570-4086-46c5-900e-887c3d5d5826.json new file mode 100644 index 000000000..05b734857 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/09996570-4086-46c5-900e-887c3d5d5826.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/1762652579.689661", + "retrieved_timestamp": "1762652579.689662", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.270228549138508 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3299802970903615 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07401812688821752 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32079166666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1634807180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/8a24b990-24f1-46f6-a4f9-4ecaa39b4ec7.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/8a24b990-24f1-46f6-a4f9-4ecaa39b4ec7.json new file mode 100644 index 000000000..68cc1ff0e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/8a24b990-24f1-46f6-a4f9-4ecaa39b4ec7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/1762652579.689882", + "retrieved_timestamp": "1762652579.689883", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24802191518504235 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33086196042215565 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3208229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16489361702127658 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/ac310031-4080-4124-a858-e1293532b222.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/ac310031-4080-4124-a858-e1293532b222.json new file mode 100644 index 000000000..4692c9c69 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/ac310031-4080-4124-a858-e1293532b222.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/1762652579.690102", + "retrieved_timestamp": "1762652579.690103", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26223531341285566 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3281993681712964 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07401812688821752 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.322125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16339760638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/75a8a0dd-e64d-4462-b8be-8006f6710653.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/75a8a0dd-e64d-4462-b8be-8006f6710653.json new file mode 100644 index 000000000..4e8e153d1 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/75a8a0dd-e64d-4462-b8be-8006f6710653.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/1762652579.690311", + "retrieved_timestamp": "1762652579.690312", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2608611816646498 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32980236442597805 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31679166666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1651429521276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/8469a871-39e1-4b21-bb7c-fa21026a01ba.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/8469a871-39e1-4b21-bb7c-fa21026a01ba.json new file mode 100644 index 000000000..4918c73ed --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/8469a871-39e1-4b21-bb7c-fa21026a01ba.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/1762652579.69052", + "retrieved_timestamp": "1762652579.690521", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2930347034756668 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3219547893625387 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06268882175226587 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3115833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1590757978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/046380aa-08bf-4d95-a4cc-bbfaf30eb56b.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/046380aa-08bf-4d95-a4cc-bbfaf30eb56b.json new file mode 100644 index 000000000..33440eb6c --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/046380aa-08bf-4d95-a4cc-bbfaf30eb56b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/1762652579.690735", + "retrieved_timestamp": "1762652579.690736", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28813880503730105 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32553831509236264 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07250755287009064 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31024999999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15816156914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/fa8ee240-a7ac-4edc-9ac7-beabf38af0fa.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/fa8ee240-a7ac-4edc-9ac7-beabf38af0fa.json new file mode 100644 index 000000000..bd63ff5d1 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/fa8ee240-a7ac-4edc-9ac7-beabf38af0fa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/1762652579.690953", + "retrieved_timestamp": "1762652579.690954", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2887383254209941 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3237016212336586 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07477341389728097 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31425 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16090425531914893 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/6d30ee72-d0ea-496d-8375-892968c8602e.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/6d30ee72-d0ea-496d-8375-892968c8602e.json new file mode 100644 index 000000000..400459f84 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/6d30ee72-d0ea-496d-8375-892968c8602e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/1762652579.691165", + "retrieved_timestamp": "1762652579.691166", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2905368865720732 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3254390641560331 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0770392749244713 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3129166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15741356382978725 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/903b0e99-e50a-4afa-8085-1fd01872c048.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/903b0e99-e50a-4afa-8085-1fd01872c048.json new file mode 100644 index 000000000..ca90d0f7e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/903b0e99-e50a-4afa-8085-1fd01872c048.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/1762652579.691372", + "retrieved_timestamp": "1762652579.691373", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2904870188876625 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32381698216947513 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0702416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30894791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15915890957446807 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/225277d4-e1b9-4992-8e2d-678ac6157b06.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/225277d4-e1b9-4992-8e2d-678ac6157b06.json new file mode 100644 index 000000000..9d7e82cfd --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/225277d4-e1b9-4992-8e2d-678ac6157b06.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/1762652579.691587", + "retrieved_timestamp": "1762652579.691587", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23925406809487715 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3244192088381941 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1573304521276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/4991436d-59fd-4f66-b588-9103beeeba5f.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/4991436d-59fd-4f66-b588-9103beeeba5f.json new file mode 100644 index 000000000..78cb2b0df --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/4991436d-59fd-4f66-b588-9103beeeba5f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/1762652579.691787", + "retrieved_timestamp": "1762652579.691788", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24747226248576 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32090616030928304 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04607250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3275208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1566655585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/6118242a-de0a-4734-979d-86f2cc6fc65c.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/6118242a-de0a-4734-979d-86f2cc6fc65c.json new file mode 100644 index 000000000..dc4847cab --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/6118242a-de0a-4734-979d-86f2cc6fc65c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/1762652579.691988", + "retrieved_timestamp": "1762652579.691989", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.232135179102559 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32779679775418075 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3021875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14960106382978725 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/a6b71abf-7ee1-438b-8218-98803bca8de8.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/a6b71abf-7ee1-438b-8218-98803bca8de8.json new file mode 100644 index 000000000..3aa5ea362 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/a6b71abf-7ee1-438b-8218-98803bca8de8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/1762652579.6921952", + "retrieved_timestamp": "1762652579.6921952", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2541667220752049 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3253117533747236 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.052870090634441085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.318125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16090425531914893 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/f7fb8d6b-9773-42e7-a426-a35a401f689a.json b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/f7fb8d6b-9773-42e7-a426-a35a401f689a.json new file mode 100644 index 000000000..dc6b22227 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/f7fb8d6b-9773-42e7-a426-a35a401f689a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/1762652579.6924422", + "retrieved_timestamp": "1762652579.692443", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.273875539125077 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3245102552473828 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04607250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25083892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3089166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15965757978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/JungZoona/T3Q-qwen2.5-14b-v1.0-e3/eb7694ce-6fe4-4bb0-bcab-266ccc71f78a.json b/data/hfopenllm_v2/alibaba/JungZoona/T3Q-qwen2.5-14b-v1.0-e3/eb7694ce-6fe4-4bb0-bcab-266ccc71f78a.json new file mode 100644 index 000000000..62a54f4dc --- /dev/null +++ b/data/hfopenllm_v2/alibaba/JungZoona/T3Q-qwen2.5-14b-v1.0-e3/eb7694ce-6fe4-4bb0-bcab-266ccc71f78a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JungZoona_T3Q-qwen2.5-14b-v1.0-e3/1762652579.697056", + "retrieved_timestamp": "1762652579.697057", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JungZoona/T3Q-qwen2.5-14b-v1.0-e3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "JungZoona/T3Q-qwen2.5-14b-v1.0-e3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.732396707403024 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7585971930826706 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2862537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41694630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5911041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5884308510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Junhoee/Qwen-Megumin/0f231e27-deec-4b10-a995-d493ecf8400f.json b/data/hfopenllm_v2/alibaba/Junhoee/Qwen-Megumin/0f231e27-deec-4b10-a995-d493ecf8400f.json new file mode 100644 index 000000000..e8a7e939d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Junhoee/Qwen-Megumin/0f231e27-deec-4b10-a995-d493ecf8400f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Junhoee_Qwen-Megumin/1762652579.69731", + "retrieved_timestamp": "1762652579.697311", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Junhoee/Qwen-Megumin", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Junhoee/Qwen-Megumin" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7141118897857683 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.528526812457251 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4901812688821752 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39803125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41988031914893614 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 15.231 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/KingNish/Qwen2.5-0.5b-Test-ft/5a28540f-3a94-478c-84c0-5be8db86328a.json b/data/hfopenllm_v2/alibaba/KingNish/Qwen2.5-0.5b-Test-ft/5a28540f-3a94-478c-84c0-5be8db86328a.json new file mode 100644 index 000000000..358a02790 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/KingNish/Qwen2.5-0.5b-Test-ft/5a28540f-3a94-478c-84c0-5be8db86328a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/KingNish_Qwen2.5-0.5b-Test-ft/1762652579.699473", + "retrieved_timestamp": "1762652579.699473", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KingNish/Qwen2.5-0.5b-Test-ft", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "KingNish/Qwen2.5-0.5b-Test-ft" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26708134416681073 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3231533857529747 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.035498489425981876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.342125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16888297872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2.1/f12c6b15-107a-41ed-98fa-40b0af5be42e.json b/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2.1/f12c6b15-107a-41ed-98fa-40b0af5be42e.json new file mode 100644 index 000000000..8cdcbe08a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2.1/f12c6b15-107a-41ed-98fa-40b0af5be42e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/KingNish_qwen-1b-continued-v2.1/1762652579.700618", + "retrieved_timestamp": "1762652579.700619", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KingNish/qwen-1b-continued-v2.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "KingNish/qwen-1b-continued-v2.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11268323603594019 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30416583041069006 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41539583333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1278257978723404 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.277 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2.2/cf6aeb1a-4814-41ad-96f5-b59caafb902f.json b/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2.2/cf6aeb1a-4814-41ad-96f5-b59caafb902f.json new file mode 100644 index 000000000..2053439a2 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2.2/cf6aeb1a-4814-41ad-96f5-b59caafb902f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/KingNish_qwen-1b-continued-v2.2/1762652579.7008262", + "retrieved_timestamp": "1762652579.700827", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KingNish/qwen-1b-continued-v2.2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "KingNish/qwen-1b-continued-v2.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14125963554479892 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30586579449667844 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35130208333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1262466755319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.277 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2/479d9f2a-82f6-42de-b8d6-92405f60638c.json b/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2/479d9f2a-82f6-42de-b8d6-92405f60638c.json new file mode 100644 index 000000000..71b1da5f0 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued-v2/479d9f2a-82f6-42de-b8d6-92405f60638c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/KingNish_qwen-1b-continued-v2/1762652579.7004201", + "retrieved_timestamp": "1762652579.700421", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KingNish/qwen-1b-continued-v2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "KingNish/qwen-1b-continued-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1578711153073844 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31194932022650246 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33927083333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11926529255319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.277 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued/a4063b77-fc24-4c9d-bf08-cb28fc6e8259.json b/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued/a4063b77-fc24-4c9d-bf08-cb28fc6e8259.json new file mode 100644 index 000000000..0338a3807 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/KingNish/qwen-1b-continued/a4063b77-fc24-4c9d-bf08-cb28fc6e8259.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/KingNish_qwen-1b-continued/1762652579.700214", + "retrieved_timestamp": "1762652579.700215", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KingNish/qwen-1b-continued", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "KingNish/qwen-1b-continued" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12547263483113694 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29909543894796364 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38587499999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1260804521276596 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.277 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT/c9a159fb-9e6b-49b3-8f2b-a2d2d3ca8f19.json b/data/hfopenllm_v2/alibaba/Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT/c9a159fb-9e6b-49b3-8f2b-a2d2d3ca8f19.json new file mode 100644 index 000000000..1a3edf10e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT/c9a159fb-9e6b-49b3-8f2b-a2d2d3ca8f19.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Kukedlc_Qwen-2.5-7b-Spanish-o1-CoT/1762652579.703295", + "retrieved_timestamp": "1762652579.703295", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4210295349672203 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5601947823443537 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726586102719033 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4776770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4363364361702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Lambent/qwen2.5-reinstruct-alternate-lumen-14B/974e902e-0959-42d0-98f8-288e1a6ce887.json b/data/hfopenllm_v2/alibaba/Lambent/qwen2.5-reinstruct-alternate-lumen-14B/974e902e-0959-42d0-98f8-288e1a6ce887.json new file mode 100644 index 000000000..cca382b4f --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Lambent/qwen2.5-reinstruct-alternate-lumen-14B/974e902e-0959-42d0-98f8-288e1a6ce887.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lambent_qwen2.5-reinstruct-alternate-lumen-14B/1762652579.707211", + "retrieved_timestamp": "1762652579.707212", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lambent/qwen2.5-reinstruct-alternate-lumen-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Lambent/qwen2.5-reinstruct-alternate-lumen-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47938137475232384 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6458988582965893 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4622356495468278 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3766778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47700000000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.538813164893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/LenguajeNaturalAI/leniachat-qwen2-1.5B-v0/eb6e6d30-b349-447c-83d3-fe7760e83037.json b/data/hfopenllm_v2/alibaba/LenguajeNaturalAI/leniachat-qwen2-1.5B-v0/eb6e6d30-b349-447c-83d3-fe7760e83037.json new file mode 100644 index 000000000..e23f36edd --- /dev/null +++ b/data/hfopenllm_v2/alibaba/LenguajeNaturalAI/leniachat-qwen2-1.5B-v0/eb6e6d30-b349-447c-83d3-fe7760e83037.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LenguajeNaturalAI_leniachat-qwen2-1.5B-v0/1762652579.713998", + "retrieved_timestamp": "1762652579.713999", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LenguajeNaturalAI/leniachat-qwen2-1.5B-v0", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "LenguajeNaturalAI/leniachat-qwen2-1.5B-v0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22211842356059697 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36835590195612017 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3749895833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18799867021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.543 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3/eb958d5c-aa2e-4640-bef7-c8b10a892847.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3/eb958d5c-aa2e-4640-bef7-c8b10a892847.json new file mode 100644 index 000000000..f29818327 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3/eb958d5c-aa2e-4640-bef7-c8b10a892847.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v3/1762652579.736984", + "retrieved_timestamp": "1762652579.7369852", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7048697456083193 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6478481476573447 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4161631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38171140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48075 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5393949468085106 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4/17c5c728-e03d-45e9-aaae-816c4e90b14f.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4/17c5c728-e03d-45e9-aaae-816c4e90b14f.json new file mode 100644 index 000000000..2b2e393a3 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4/17c5c728-e03d-45e9-aaae-816c4e90b14f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v4/1762652579.737248", + "retrieved_timestamp": "1762652579.7372491", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6943033373670748 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6419880364363972 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3466767371601209 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3716442953020134 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.476875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5251828457446809 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5/79d3d942-8d5f-4aca-8759-8d70b8cfc5f3.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5/79d3d942-8d5f-4aca-8759-8d70b8cfc5f3.json new file mode 100644 index 000000000..1fabf9deb --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5/79d3d942-8d5f-4aca-8759-8d70b8cfc5f3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v5/1762652579.737468", + "retrieved_timestamp": "1762652579.737469", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7485084021507378 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6466679318879384 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43580060422960726 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3624161073825503 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4473020833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5140458776595744 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/92bff089-baed-4f1f-852b-f274a7920a1a.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/92bff089-baed-4f1f-852b-f274a7920a1a.json new file mode 100644 index 000000000..d5494427f --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/92bff089-baed-4f1f-852b-f274a7920a1a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/1762652579.7379", + "retrieved_timestamp": "1762652579.7379", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46634152936430895 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6214839063250638 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33157099697885195 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37583892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49373958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5204454787234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6/c4b27a1b-28dd-4a79-839c-ad8673034937.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6/c4b27a1b-28dd-4a79-839c-ad8673034937.json new file mode 100644 index 000000000..4a0184237 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6/c4b27a1b-28dd-4a79-839c-ad8673034937.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6/1762652579.737686", + "retrieved_timestamp": "1762652579.737687", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.704320092909037 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6457646219275207 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3957703927492447 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3775167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47678125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5392287234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/46a21741-1860-4498-8284-c94fccad1ed0.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/46a21741-1860-4498-8284-c94fccad1ed0.json new file mode 100644 index 000000000..f7a55eb21 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/46a21741-1860-4498-8284-c94fccad1ed0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/1762652579.738374", + "retrieved_timestamp": "1762652579.7383769", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.693054428915278 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6422587980411637 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3406344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48881250000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5276761968085106 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7/d540acde-9601-4119-8ae2-f7cdf82f43f7.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7/d540acde-9601-4119-8ae2-f7cdf82f43f7.json new file mode 100644 index 000000000..039af056d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7/d540acde-9601-4119-8ae2-f7cdf82f43f7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7/1762652579.738115", + "retrieved_timestamp": "1762652579.738116", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6793906833867471 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.653127892154805 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41012084592145015 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37919463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4833854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5375664893617021 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5/c723fc6f-2656-4084-81d0-4cbaf0587049.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5/c723fc6f-2656-4084-81d0-4cbaf0587049.json new file mode 100644 index 000000000..881b5f514 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5/c723fc6f-2656-4084-81d0-4cbaf0587049.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.5/1762652579.738977", + "retrieved_timestamp": "1762652579.7389781", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5928624937388352 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6451310724242122 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36555891238670696 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3800335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47696875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5290059840425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6/526f6468-b7a8-47a7-9ed4-c2aa7cc63ca1.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6/526f6468-b7a8-47a7-9ed4-c2aa7cc63ca1.json new file mode 100644 index 000000000..f55560c5a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6/526f6468-b7a8-47a7-9ed4-c2aa7cc63ca1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.6/1762652579.7392142", + "retrieved_timestamp": "1762652579.7392151", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5919382793210903 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6457173605698173 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4070996978851964 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38422818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49532291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5399767287234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7/56232cf6-7ee7-45ed-b139-ea20e148b5fa.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7/56232cf6-7ee7-45ed-b139-ea20e148b5fa.json new file mode 100644 index 000000000..17fbc40a7 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7/56232cf6-7ee7-45ed-b139-ea20e148b5fa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.7/1762652579.7395148", + "retrieved_timestamp": "1762652579.739517", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7874761189200211 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6482757721443902 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.540785498489426 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35151006711409394 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4380625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.524185505319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8/51ff4f00-1d21-4f98-b5a3-7a72c4b2a5b1.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8/51ff4f00-1d21-4f98-b5a3-7a72c4b2a5b1.json new file mode 100644 index 000000000..f4a9cfa1c --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8/51ff4f00-1d21-4f98-b5a3-7a72c4b2a5b1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.8/1762652579.739795", + "retrieved_timestamp": "1762652579.739796", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7027963581075989 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6565626437486437 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42371601208459214 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37583892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4911979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5323304521276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9/eee0ebda-6ff8-45bd-ac4e-15aeb724d0d1.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9/eee0ebda-6ff8-45bd-ac4e-15aeb724d0d1.json new file mode 100644 index 000000000..88ff0231d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9/eee0ebda-6ff8-45bd-ac4e-15aeb724d0d1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.9/1762652579.74003", + "retrieved_timestamp": "1762652579.740031", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7993413032974729 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6483097746745584 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5370090634441088 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3296979865771812 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43282291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5199468085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8/b3e7af18-231e-4839-809c-bc5bfe7b4182.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8/b3e7af18-231e-4839-809c-bc5bfe7b4182.json new file mode 100644 index 000000000..54d8a94ad --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8/b3e7af18-231e-4839-809c-bc5bfe7b4182.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8/1762652579.738731", + "retrieved_timestamp": "1762652579.738732", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7874761189200211 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6419472828128271 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5558912386706949 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33557046979865773 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43936458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5206117021276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/757269fe-8662-4eaa-8e76-5c2f88d8fbb0.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/757269fe-8662-4eaa-8e76-5c2f88d8fbb0.json new file mode 100644 index 000000000..3ed5139c4 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/757269fe-8662-4eaa-8e76-5c2f88d8fbb0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/1762652579.740509", + "retrieved_timestamp": "1762652579.74051", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6513639365771708 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6570671029574323 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41842900302114805 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38422818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4819583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5412234042553191 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1/dffd1a4a-a056-43c2-bda3-0cfa21406656.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1/dffd1a4a-a056-43c2-bda3-0cfa21406656.json new file mode 100644 index 000000000..3abcf273a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1/dffd1a4a-a056-43c2-bda3-0cfa21406656.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.1/1762652579.74074", + "retrieved_timestamp": "1762652579.740741", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8002655177152178 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6554749578648256 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5468277945619335 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34312080536912754 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43539583333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5250997340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2/b5ecb480-16e6-4dfb-be77-ad8ef4e90aa3.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2/b5ecb480-16e6-4dfb-be77-ad8ef4e90aa3.json new file mode 100644 index 000000000..a19c197d4 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2/b5ecb480-16e6-4dfb-be77-ad8ef4e90aa3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.2/1762652579.74097", + "retrieved_timestamp": "1762652579.74097", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7862272104682243 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6537693501484436 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5332326283987915 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35570469798657717 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43809375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5283410904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9/682a38c6-2fb8-4c42-b6ad-69fbe65be484.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9/682a38c6-2fb8-4c42-b6ad-69fbe65be484.json new file mode 100644 index 000000000..d8cc77e41 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9/682a38c6-2fb8-4c42-b6ad-69fbe65be484.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9/1762652579.740272", + "retrieved_timestamp": "1762652579.740273", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.523519816309614 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6545588984302916 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43655589123867067 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3884228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4805625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.542220744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion/cf14f098-cd46-4ca0-acec-02012eb78ea3.json b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion/cf14f098-cd46-4ca0-acec-02012eb78ea3.json new file mode 100644 index 000000000..97958544d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion/cf14f098-cd46-4ca0-acec-02012eb78ea3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-OriginalFusion/1762652579.741195", + "retrieved_timestamp": "1762652579.741195", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6141947809589667 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6592166466793806 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42749244712990936 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.51215625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5238530585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial/f1b6c510-02fe-4ffd-96da-4cfcfb04eb8c.json b/data/hfopenllm_v2/alibaba/Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial/f1b6c510-02fe-4ffd-96da-4cfcfb04eb8c.json new file mode 100644 index 000000000..ee067b692 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial/f1b6c510-02fe-4ffd-96da-4cfcfb04eb8c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Marsouuu_MiniQwenMathExpert-ECE-PRYMMAL-Martial/1762652579.747411", + "retrieved_timestamp": "1762652579.747412", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2794961812435449 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42301343044108936 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11404833836858005 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38673958333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2922207446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow/ee23e137-57d2-49aa-b267-27bd48457d46.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow/ee23e137-57d2-49aa-b267-27bd48457d46.json new file mode 100644 index 000000000..3795e6291 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow/ee23e137-57d2-49aa-b267-27bd48457d46.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Qwen1.5-MoE-A2.7B-Wikihow/1762652579.750923", + "retrieved_timestamp": "1762652579.750923", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29543278501043896 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3920071454890602 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0823262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35021875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23803191489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 14.316 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2-72b/ae68a60d-a2df-45f1-b446-1400901cb6ff.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2-72b/ae68a60d-a2df-45f1-b446-1400901cb6ff.json new file mode 100644 index 000000000..343294d93 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2-72b/ae68a60d-a2df-45f1-b446-1400901cb6ff.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-qwen2-72b/1762652579.75234", + "retrieved_timestamp": "1762652579.752341", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.1-qwen2-72b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.1-qwen2-72b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8162774770941104 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6965560971922596 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47321875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5414727393617021 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.699 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2-7b/6c31df3b-e408-4a6c-b475-78f174630cad.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2-7b/6c31df3b-e408-4a6c-b475-78f174630cad.json new file mode 100644 index 000000000..c1ffe9181 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2-7b/6c31df3b-e408-4a6c-b475-78f174630cad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-qwen2-7b/1762652579.752553", + "retrieved_timestamp": "1762652579.752554", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.1-qwen2-7b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.1-qwen2-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3816119008674761 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5045925887362795 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2311178247734139 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44369791666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3692652925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2.5-72b/2b841a46-6210-4092-875f-ca3ae36f3d25.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2.5-72b/2b841a46-6210-4092-875f-ca3ae36f3d25.json new file mode 100644 index 000000000..0f2afb1ee --- /dev/null +++ b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.1-qwen2.5-72b/2b841a46-6210-4092-875f-ca3ae36f3d25.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-qwen2.5-72b/1762652579.752765", + "retrieved_timestamp": "1762652579.752765", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.1-qwen2.5-72b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.1-qwen2.5-72b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8662360315075112 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7261624327092416 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5913897280966768 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36325503355704697 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42984375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5619182180851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.7 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2-72b/250897a9-7d48-4323-813d-fa48befe2cbe.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2-72b/250897a9-7d48-4323-813d-fa48befe2cbe.json new file mode 100644 index 000000000..59190bae2 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2-72b/250897a9-7d48-4323-813d-fa48befe2cbe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-qwen2-72b/1762652579.753872", + "retrieved_timestamp": "1762652579.753872", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.2-qwen2-72b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.2-qwen2-72b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8008151704145002 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6939595229335245 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45317220543806647 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37416107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4508020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.543467420212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2-7b/154b7a41-e1bf-4827-a6a7-279ea170ab7e.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2-7b/154b7a41-e1bf-4827-a6a7-279ea170ab7e.json new file mode 100644 index 000000000..0b96a7b14 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2-7b/154b7a41-e1bf-4827-a6a7-279ea170ab7e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-qwen2-7b/1762652579.7540858", + "retrieved_timestamp": "1762652579.754087", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.2-qwen2-7b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.2-qwen2-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35972996094806226 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5214913750127922 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21450151057401812 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43582291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3898769946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2.5-72b/1fa2ab02-9a1c-4e7e-95b8-27e78af0ba73.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2.5-72b/1fa2ab02-9a1c-4e7e-95b8-27e78af0ba73.json new file mode 100644 index 000000000..b07673e58 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.2-qwen2.5-72b/1fa2ab02-9a1c-4e7e-95b8-27e78af0ba73.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-qwen2.5-72b/1762652579.754294", + "retrieved_timestamp": "1762652579.754294", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.2-qwen2.5-72b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.2-qwen2.5-72b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8476763875406145 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7276399007138082 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35906040268456374 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4206666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.561751994680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.7 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.3-qwen2-72b/8b769df2-18f5-4712-a02b-962d3e2bb7c7.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.3-qwen2-72b/8b769df2-18f5-4712-a02b-962d3e2bb7c7.json new file mode 100644 index 000000000..7bf9b81d7 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.3-qwen2-72b/8b769df2-18f5-4712-a02b-962d3e2bb7c7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-qwen2-72b/1762652579.755723", + "retrieved_timestamp": "1762652579.755724", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.3-qwen2-72b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.3-qwen2-72b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3849840645044039 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6576306700720502 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31722054380664655 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3716442953020134 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4112395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5418882978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.3-qwen2-7b/3272e904-21d5-4116-abde-0e74fe48b9d5.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.3-qwen2-7b/3272e904-21d5-4116-abde-0e74fe48b9d5.json new file mode 100644 index 000000000..fc36c6bb6 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.3-qwen2-7b/3272e904-21d5-4116-abde-0e74fe48b9d5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-qwen2-7b/1762652579.755967", + "retrieved_timestamp": "1762652579.755968", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.3-qwen2-7b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.3-qwen2-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3824862476008103 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5064049035932394 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20694864048338368 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4422395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3611203457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.4-qwen2-7b/5f54ee4a-42e8-4dd0-88bc-915d2f1971a0.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.4-qwen2-7b/5f54ee4a-42e8-4dd0-88bc-915d2f1971a0.json new file mode 100644 index 000000000..22fdb7227 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.4-qwen2-7b/5f54ee4a-42e8-4dd0-88bc-915d2f1971a0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.4-qwen2-7b/1762652579.756743", + "retrieved_timestamp": "1762652579.756744", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.4-qwen2-7b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.4-qwen2-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32995452067181746 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5101416326251771 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20317220543806647 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44528125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3976894946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.5-qwen2-7b/762f6ff3-4823-4de8-8351-045e1d1d383b.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.5-qwen2-7b/762f6ff3-4823-4de8-8351-045e1d1d383b.json new file mode 100644 index 000000000..f4eae59e4 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.5-qwen2-7b/762f6ff3-4823-4de8-8351-045e1d1d383b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.5-qwen2-7b/1762652579.757269", + "retrieved_timestamp": "1762652579.75727", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.5-qwen2-7b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.5-qwen2-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31449221399220734 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4886561146965678 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2258308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45646875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3681848404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.6-qwen2-7b/65f44cf9-f619-4f43-a03f-09e22386d319.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.6-qwen2-7b/65f44cf9-f619-4f43-a03f-09e22386d319.json new file mode 100644 index 000000000..91fe3fabb --- /dev/null +++ b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.6-qwen2-7b/65f44cf9-f619-4f43-a03f-09e22386d319.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.6-qwen2-7b/1762652579.7575328", + "retrieved_timestamp": "1762652579.757534", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.6-qwen2-7b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.6-qwen2-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3442676542684522 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4930243946403894 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1216012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2843959731543625 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4586145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3731715425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.7-qwen2-7b/f592bc27-c97c-4b14-abcf-30782d8c0056.json b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.7-qwen2-7b/f592bc27-c97c-4b14-abcf-30782d8c0056.json new file mode 100644 index 000000000..caf6ae623 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/MaziyarPanahi/calme-2.7-qwen2-7b/f592bc27-c97c-4b14-abcf-30782d8c0056.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.7-qwen2-7b/1762652579.757804", + "retrieved_timestamp": "1762652579.757805", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.7-qwen2-7b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.7-qwen2-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3592301759331906 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4883170901309997 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13821752265861026 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48242708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3705119680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Minami-su/Amara-o1-7B-Qwen/6910eff9-74bc-46b0-8f8c-20642bef4a12.json b/data/hfopenllm_v2/alibaba/Minami-su/Amara-o1-7B-Qwen/6910eff9-74bc-46b0-8f8c-20642bef4a12.json new file mode 100644 index 000000000..ac866ee38 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Minami-su/Amara-o1-7B-Qwen/6910eff9-74bc-46b0-8f8c-20642bef4a12.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Minami-su_Amara-o1-7B-Qwen/1762652579.759999", + "retrieved_timestamp": "1762652579.76", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Minami-su/Amara-o1-7B-Qwen", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Minami-su/Amara-o1-7B-Qwen" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7389914316236474 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5199420077880453 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5181268882175226 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40066666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4083277925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Minami-su/Amara-o2-7B-Qwen/ebd5da9f-60d5-492e-916b-5e123442316c.json b/data/hfopenllm_v2/alibaba/Minami-su/Amara-o2-7B-Qwen/ebd5da9f-60d5-492e-916b-5e123442316c.json new file mode 100644 index 000000000..574134d0b --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Minami-su/Amara-o2-7B-Qwen/ebd5da9f-60d5-492e-916b-5e123442316c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Minami-su_Amara-o2-7B-Qwen/1762652579.760268", + "retrieved_timestamp": "1762652579.760268", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Minami-su/Amara-o2-7B-Qwen", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Minami-su/Amara-o2-7B-Qwen" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7146615424850509 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5173432604435285 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4086102719033233 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37809374999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41647273936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a/eaf601d2-f285-4b0c-b3ab-5d029b8fe20f.json b/data/hfopenllm_v2/alibaba/Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a/eaf601d2-f285-4b0c-b3ab-5d029b8fe20f.json new file mode 100644 index 000000000..9be688bb7 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a/eaf601d2-f285-4b0c-b3ab-5d029b8fe20f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Qwen_2.5_3b_Smarteaz_0.01a/1762652579.782197", + "retrieved_timestamp": "1762652579.782198", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4011954946209391 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4636652015725344 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1805135951661631 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43204166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2859873670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.085 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500/c0182d01-454b-4194-be7a-81b9a9672d07.json b/data/hfopenllm_v2/alibaba/NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500/c0182d01-454b-4194-be7a-81b9a9672d07.json new file mode 100644 index 000000000..26f772ddb --- /dev/null +++ b/data/hfopenllm_v2/alibaba/NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500/c0182d01-454b-4194-be7a-81b9a9672d07.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NikolaSigmoid_DeepSeek-R1-Distill-Qwen-1.5B-500/1762652579.783665", + "retrieved_timestamp": "1762652579.783666", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17485715678843247 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2601595454586609 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24580536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33796875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1124501329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.157 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/PJMixers-Dev/Qwen2.5-RomboTiesTest-7B/a954be32-0c84-4ffe-9c4f-7f895c77e197.json b/data/hfopenllm_v2/alibaba/PJMixers-Dev/Qwen2.5-RomboTiesTest-7B/a954be32-0c84-4ffe-9c4f-7f895c77e197.json new file mode 100644 index 000000000..ad31c0a69 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/PJMixers-Dev/Qwen2.5-RomboTiesTest-7B/a954be32-0c84-4ffe-9c4f-7f895c77e197.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PJMixers-Dev_Qwen2.5-RomboTiesTest-7B/1762652579.811478", + "retrieved_timestamp": "1762652579.81148", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PJMixers-Dev/Qwen2.5-RomboTiesTest-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "PJMixers-Dev/Qwen2.5-RomboTiesTest-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7558023821238757 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5398673461520839 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4962235649546828 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4033645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4285239361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.808 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/7b8f75d1-ef18-4fb4-abbb-efd6147fe74c.json b/data/hfopenllm_v2/alibaba/Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/7b8f75d1-ef18-4fb4-abbb-efd6147fe74c.json new file mode 100644 index 000000000..4453b5060 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/7b8f75d1-ef18-4fb4-abbb-efd6147fe74c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Pinkstack_PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/1762652579.812139", + "retrieved_timestamp": "1762652579.812139", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5084819390328772 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47105662040096935 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1691842900302115 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44785416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35106382978723405 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/QwQ-32B-Preview/1326f0c0-9355-47ff-813b-0729370e1487.json b/data/hfopenllm_v2/alibaba/Qwen/QwQ-32B-Preview/1326f0c0-9355-47ff-813b-0729370e1487.json new file mode 100644 index 000000000..7ce95fc92 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/QwQ-32B-Preview/1326f0c0-9355-47ff-813b-0729370e1487.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_QwQ-32B-Preview/1762652579.834909", + "retrieved_timestamp": "1762652579.83491", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/QwQ-32B-Preview", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/QwQ-32B-Preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4035437084713006 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6691381482252744 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44939577039274925 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2818791946308725 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4109895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5678191489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/QwQ-32B/788241ad-d975-498e-80ef-b0d04bd8db85.json b/data/hfopenllm_v2/alibaba/Qwen/QwQ-32B/788241ad-d975-498e-80ef-b0d04bd8db85.json new file mode 100644 index 000000000..c8c60953c --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/QwQ-32B/788241ad-d975-498e-80ef-b0d04bd8db85.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_QwQ-32B/1762652579.8346298", + "retrieved_timestamp": "1762652579.834631", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/QwQ-32B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/QwQ-32B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39767372793077926 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29829653176003074 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1608761329305136 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42063541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11959773936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-0.5B/e0115d6b-3b2c-4047-b64c-1e7afb5edd55.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-0.5B/e0115d6b-3b2c-4047-b64c-1e7afb5edd55.json new file mode 100644 index 000000000..1a1b64ba3 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-0.5B/e0115d6b-3b2c-4047-b64c-1e7afb5edd55.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-0.5B/1762652579.835391", + "retrieved_timestamp": "1762652579.835392", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen1.5-0.5B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen1.5-0.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17056077873375977 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3153538659142558 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.017371601208459216 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36162500000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1307347074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.62 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-1.8B/7c828833-fd36-4a84-8530-d3c1769ca822.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-1.8B/7c828833-fd36-4a84-8530-d3c1769ca822.json new file mode 100644 index 000000000..5ad918369 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-1.8B/7c828833-fd36-4a84-8530-d3c1769ca822.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-1.8B/1762652579.835954", + "retrieved_timestamp": "1762652579.835955", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen1.5-1.8B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen1.5-1.8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2154239639711521 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3476121558366305 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03172205438066465 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36051041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18816489361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.837 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-110B/29389e2b-7898-4f9f-ba8c-8fe4dad80295.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-110B/29389e2b-7898-4f9f-ba8c-8fe4dad80295.json new file mode 100644 index 000000000..036719a30 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-110B/29389e2b-7898-4f9f-ba8c-8fe4dad80295.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-110B/1762652579.836433", + "retrieved_timestamp": "1762652579.836434", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen1.5-110B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen1.5-110B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3421942667677318 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6099964981780978 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24697885196374622 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3523489932885906 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44084375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5360704787234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 111.21 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-14B/9afcb068-65e2-4d4c-b7ee-071eb4dbac73.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-14B/9afcb068-65e2-4d4c-b7ee-071eb4dbac73.json new file mode 100644 index 000000000..c008ac930 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-14B/9afcb068-65e2-4d4c-b7ee-071eb4dbac73.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-14B/1762652579.836853", + "retrieved_timestamp": "1762652579.836853", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen1.5-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen1.5-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2905368865720732 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5080327493808331 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20241691842900303 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41864583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36436170212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.167 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-32B/b8cd9221-dd4e-4f49-b03e-f11bdd5773e4.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-32B/b8cd9221-dd4e-4f49-b03e-f11bdd5773e4.json new file mode 100644 index 000000000..19bcf18a2 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-32B/b8cd9221-dd4e-4f49-b03e-f11bdd5773e4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-32B/1762652579.837265", + "retrieved_timestamp": "1762652579.837266", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen1.5-32B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen1.5-32B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.329729562006587 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5715390555959325 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028700906344411 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3296979865771812 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4277916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4499667553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.512 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-4B/1e3f60f2-814a-4979-87bd-f5f94d5b09cc.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-4B/1e3f60f2-814a-4979-87bd-f5f94d5b09cc.json new file mode 100644 index 000000000..78e6eea27 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-4B/1e3f60f2-814a-4979-87bd-f5f94d5b09cc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-4B/1762652579.837696", + "retrieved_timestamp": "1762652579.837697", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen1.5-4B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen1.5-4B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24447466056729478 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40538970296725463 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.052870090634441085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3604479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24601063829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.95 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-7B/102378fc-7b98-4088-a6f5-3039e7b638d5.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-7B/102378fc-7b98-4088-a6f5-3039e7b638d5.json new file mode 100644 index 000000000..9eba02630 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-7B/102378fc-7b98-4088-a6f5-3039e7b638d5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-7B/1762652579.838115", + "retrieved_timestamp": "1762652579.8381162", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen1.5-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen1.5-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684299879874289 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4559896407693445 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09290030211480363 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4103333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29163896276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.721 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-MoE-A2.7B/c6aa0ed8-3b79-4d73-8587-762e9469f4ce.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-MoE-A2.7B/c6aa0ed8-3b79-4d73-8587-762e9469f4ce.json new file mode 100644 index 000000000..555eb3cfe --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen1.5-MoE-A2.7B/c6aa0ed8-3b79-4d73-8587-762e9469f4ce.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-MoE-A2.7B/1762652579.83854", + "retrieved_timestamp": "1762652579.83854", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen1.5-MoE-A2.7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen1.5-MoE-A2.7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.265982038768246 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4113515433010766 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09290030211480363 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40134375000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2777593085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 14.316 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2-0.5B/cdf3b683-29d9-45b4-b6a6-1f67927ef953.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2-0.5B/cdf3b683-29d9-45b4-b6a6-1f67927ef953.json new file mode 100644 index 000000000..4f3d5e4de --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen2-0.5B/cdf3b683-29d9-45b4-b6a6-1f67927ef953.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-0.5B/1762652579.838974", + "retrieved_timestamp": "1762652579.838975", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2-0.5B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen2-0.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18732186154957736 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3239117424825444 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37520833333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17195811170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2-1.5B/6eb76673-0633-440b-8849-8fcf8cf00954.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2-1.5B/6eb76673-0633-440b-8849-8fcf8cf00954.json new file mode 100644 index 000000000..c0d00d0d6 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen2-1.5B/6eb76673-0633-440b-8849-8fcf8cf00954.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-1.5B/1762652579.839384", + "retrieved_timestamp": "1762652579.839385", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2-1.5B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen2-1.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21132705665412216 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35747931720577464 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0702416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36581250000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2551529255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2-57B-A14B/aafb84cd-5950-4b93-98d1-9e50fd294b65.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2-57B-A14B/aafb84cd-5950-4b93-98d1-9e50fd294b65.json new file mode 100644 index 000000000..af6120610 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen2-57B-A14B/aafb84cd-5950-4b93-98d1-9e50fd294b65.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-57B-A14B/1762652579.8398201", + "retrieved_timestamp": "1762652579.839821", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2-57B-A14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen2-57B-A14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31126965340851165 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5618204938684165 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1865558912386707 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.417375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4916057180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 57.409 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2-72B/fc683e1a-327f-4a69-bd51-9022c587159b.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2-72B/fc683e1a-327f-4a69-bd51-9022c587159b.json new file mode 100644 index 000000000..f5d39d30b --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen2-72B/fc683e1a-327f-4a69-bd51-9022c587159b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-72B/1762652579.8402402", + "retrieved_timestamp": "1762652579.840241", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2-72B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen2-72B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3823610243044012 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.661734029856643 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311178247734139 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39429530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47036458333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5730551861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2-7B/196e965c-4570-43aa-ba0d-13972796bda9.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2-7B/196e965c-4570-43aa-ba0d-13972796bda9.json new file mode 100644 index 000000000..57c3b27a6 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen2-7B/196e965c-4570-43aa-ba0d-13972796bda9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-7B/1762652579.840696", + "retrieved_timestamp": "1762652579.840696", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen2-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3148667757106699 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.531531595001889 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4439166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41830119680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2-Math-7B/fe474496-4efa-4ef7-844d-32b17abda7c8.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2-Math-7B/fe474496-4efa-4ef7-844d-32b17abda7c8.json new file mode 100644 index 000000000..be65f27cf --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen2-Math-7B/fe474496-4efa-4ef7-844d-32b17abda7c8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-Math-7B/1762652579.841364", + "retrieved_timestamp": "1762652579.841364", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2-Math-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen2-Math-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2687048143370701 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.386954741074792 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24773413897280966 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35933333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1196808510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-0.5B/c8110747-f2dd-46d0-b2b3-706d70e1d714.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-0.5B/c8110747-f2dd-46d0-b2b3-706d70e1d714.json new file mode 100644 index 000000000..c93fe8471 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-0.5B/c8110747-f2dd-46d0-b2b3-706d70e1d714.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-0.5B/1762652579.841982", + "retrieved_timestamp": "1762652579.841983", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-0.5B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-0.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16271714606133947 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32748148151196615 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03927492447129909 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24664429530201343 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3433333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19057513297872342 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.5 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-1.5B/9982c576-75fd-47f6-8fe9-52b56fc58d3f.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-1.5B/9982c576-75fd-47f6-8fe9-52b56fc58d3f.json new file mode 100644 index 000000000..41839d9fb --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-1.5B/9982c576-75fd-47f6-8fe9-52b56fc58d3f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-1.5B/1762652579.8426108", + "retrieved_timestamp": "1762652579.842612", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-1.5B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-1.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26743041795768563 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40779509451366147 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09138972809667674 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35759375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28548869680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.5 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-14B/b02dabaf-2aac-468d-b0cc-c7194c2094fd.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-14B/b02dabaf-2aac-468d-b0cc-c7194c2094fd.json new file mode 100644 index 000000000..dc97cd178 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-14B/b02dabaf-2aac-468d-b0cc-c7194c2094fd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-14B/1762652579.843051", + "retrieved_timestamp": "1762652579.8430521", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3694464022127954 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.616051493531774 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29003021148036257 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38171140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4502395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5248503989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-32B/9dd61039-27d0-42f3-9b03-65b0a59465d4.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-32B/9dd61039-27d0-42f3-9b03-65b0a59465d4.json new file mode 100644 index 000000000..a5e6dabfc --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-32B/9dd61039-27d0-42f3-9b03-65b0a59465d4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-32B/1762652579.843701", + "retrieved_timestamp": "1762652579.843702", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-32B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-32B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40766499554515356 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6770522448726507 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3564954682779456 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41191275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49783333333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5805352393617021 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-3B/43062e28-5532-4e31-ac49-fbd794c7f664.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-3B/43062e28-5532-4e31-ac49-fbd794c7f664.json new file mode 100644 index 000000000..08e0a42e3 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-3B/43062e28-5532-4e31-ac49-fbd794c7f664.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-3B/1762652579.8441322", + "retrieved_timestamp": "1762652579.8441331", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-3B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2689541527591236 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4612475341011634 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14803625377643503 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4303333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3203125 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-72B/89ce1911-289d-40bb-be48-f9a4d8d73ac2.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-72B/89ce1911-289d-40bb-be48-f9a4d8d73ac2.json new file mode 100644 index 000000000..2529499f5 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-72B/89ce1911-289d-40bb-be48-f9a4d8d73ac2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-72B/1762652579.844565", + "retrieved_timestamp": "1762652579.844566", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-72B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-72B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4137100670664947 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6797320670694852 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39123867069486407 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4052013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.477125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5968251329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-7B/bed92e1c-8f11-4f70-826e-569aa55baa09.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-7B/bed92e1c-8f11-4f70-826e-569aa55baa09.json new file mode 100644 index 000000000..1de74221a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-7B/bed92e1c-8f11-4f70-826e-569aa55baa09.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-7B/1762652579.8449879", + "retrieved_timestamp": "1762652579.8449888", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3374479713825982 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5416303767788616 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25075528700906347 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4424270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4365026595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-14B/d0ae041c-8b56-4ce1-841b-96622a724894.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-14B/d0ae041c-8b56-4ce1-841b-96622a724894.json new file mode 100644 index 000000000..db4ce938c --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-14B/d0ae041c-8b56-4ce1-841b-96622a724894.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-14B/1762652579.8457868", + "retrieved_timestamp": "1762652579.845789", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-Coder-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-Coder-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3472652561869174 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5864860091741232 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22507552870090636 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3873645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4521276595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-32B/743c517a-ad0f-495d-b9d0-cdca01335933.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-32B/743c517a-ad0f-495d-b9d0-cdca01335933.json new file mode 100644 index 000000000..ae46c4b9c --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-32B/743c517a-ad0f-495d-b9d0-cdca01335933.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-32B/1762652579.846424", + "retrieved_timestamp": "1762652579.846425", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-Coder-32B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-Coder-32B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4363411304228336 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.640395506550809 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30891238670694865 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3464765100671141 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4528125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5302526595744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-7B/5e82cb32-8291-497b-ac56-16b50947d1bf.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-7B/5e82cb32-8291-497b-ac56-16b50947d1bf.json new file mode 100644 index 000000000..13a7a94b0 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Coder-7B/5e82cb32-8291-497b-ac56-16b50947d1bf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-7B/1762652579.846894", + "retrieved_timestamp": "1762652579.8468952", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-Coder-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-Coder-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.344592348302504 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48556405534214747 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19184290030211482 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3448541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3679355053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Math-7B/8fddcebe-58d2-4d40-8147-f02feabc0d9c.json b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Math-7B/8fddcebe-58d2-4d40-8147-f02feabc0d9c.json new file mode 100644 index 000000000..87c4a63fe --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Qwen/Qwen2.5-Math-7B/8fddcebe-58d2-4d40-8147-f02feabc0d9c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Math-7B/1762652579.8480499", + "retrieved_timestamp": "1762652579.848052", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Qwen/Qwen2.5-Math-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Qwen/Qwen2.5-Math-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24599839536873275 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4454639372840941 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30513595166163143 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37809374999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27177526595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/RESMPDEV/EVA-Qwen2.5-1.5B-FRFR/648e69e2-54de-43c4-93ac-f8422fa4b9c1.json b/data/hfopenllm_v2/alibaba/RESMPDEV/EVA-Qwen2.5-1.5B-FRFR/648e69e2-54de-43c4-93ac-f8422fa4b9c1.json new file mode 100644 index 000000000..85bbbdfcd --- /dev/null +++ b/data/hfopenllm_v2/alibaba/RESMPDEV/EVA-Qwen2.5-1.5B-FRFR/648e69e2-54de-43c4-93ac-f8422fa4b9c1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/RESMPDEV_EVA-Qwen2.5-1.5B-FRFR/1762652579.848896", + "retrieved_timestamp": "1762652579.848896", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RESMPDEV/EVA-Qwen2.5-1.5B-FRFR", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "RESMPDEV/EVA-Qwen2.5-1.5B-FRFR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.308172316121225 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3932411333682871 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1027190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3539375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27701130319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/RESMPDEV/Qwen2-Wukong-0.5B/72a11594-1d83-4e12-b82f-137b6749f5ab.json b/data/hfopenllm_v2/alibaba/RESMPDEV/Qwen2-Wukong-0.5B/72a11594-1d83-4e12-b82f-137b6749f5ab.json new file mode 100644 index 000000000..f547bc170 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/RESMPDEV/Qwen2-Wukong-0.5B/72a11594-1d83-4e12-b82f-137b6749f5ab.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/RESMPDEV_Qwen2-Wukong-0.5B/1762652579.849144", + "retrieved_timestamp": "1762652579.849144", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RESMPDEV/Qwen2-Wukong-0.5B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "RESMPDEV/Qwen2-Wukong-0.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1854235650296768 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.308451428837168 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0015105740181268882 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23657718120805368 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3524791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13272938829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Replete-AI/Replete-Coder-Qwen2-1.5b/1ff6b76b-7241-4f06-9db5-4594d3ff7a3f.json b/data/hfopenllm_v2/alibaba/Replete-AI/Replete-Coder-Qwen2-1.5b/1ff6b76b-7241-4f06-9db5-4594d3ff7a3f.json new file mode 100644 index 000000000..8b3f7e8d8 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Replete-AI/Replete-Coder-Qwen2-1.5b/1ff6b76b-7241-4f06-9db5-4594d3ff7a3f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-Coder-Qwen2-1.5b/1762652579.852138", + "retrieved_timestamp": "1762652579.852139", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Replete-AI/Replete-Coder-Qwen2-1.5b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Replete-AI/Replete-Coder-Qwen2-1.5b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30142798884736943 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34747295666696026 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4072708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21467752659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b/20a6e090-2c78-4eb9-870e-9abbcbada6f9.json b/data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b/20a6e090-2c78-4eb9-870e-9abbcbada6f9.json new file mode 100644 index 000000000..75f4996fe --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b/20a6e090-2c78-4eb9-870e-9abbcbada6f9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-LLM-Qwen2-7b/1762652579.852611", + "retrieved_timestamp": "1762652579.852612", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Replete-AI/Replete-LLM-Qwen2-7b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Replete-AI/Replete-LLM-Qwen2-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09324813716494457 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2976924067792704 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24748322147651006 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39409374999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11569148936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b/a846978d-de78-48e8-a738-54c732e50c28.json b/data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b/a846978d-de78-48e8-a738-54c732e50c28.json new file mode 100644 index 000000000..69b71649b --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b/a846978d-de78-48e8-a738-54c732e50c28.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-LLM-Qwen2-7b/1762652579.8524", + "retrieved_timestamp": "1762652579.8524008", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Replete-AI/Replete-LLM-Qwen2-7b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Replete-AI/Replete-LLM-Qwen2-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09047549391170981 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29852574011260374 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38476041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1157746010638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview/4977e0d5-1446-41ba-b00b-e8236c896d2e.json b/data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview/4977e0d5-1446-41ba-b00b-e8236c896d2e.json new file mode 100644 index 000000000..7fc8bd94e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview/4977e0d5-1446-41ba-b00b-e8236c896d2e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-LLM-Qwen2-7b_Beta-Preview/1762652579.852791", + "retrieved_timestamp": "1762652579.852791", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08575468645416384 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2929321328066677 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2483221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3980625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1284906914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Rombo-Org/Rombo-LLM-V2.5-Qwen-7b/8713e6fb-8843-43f2-af3b-57a59d326670.json b/data/hfopenllm_v2/alibaba/Rombo-Org/Rombo-LLM-V2.5-Qwen-7b/8713e6fb-8843-43f2-af3b-57a59d326670.json new file mode 100644 index 000000000..0a9bc0f5a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Rombo-Org/Rombo-LLM-V2.5-Qwen-7b/8713e6fb-8843-43f2-af3b-57a59d326670.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Rombo-Org_Rombo-LLM-V2.5-Qwen-7b/1762652579.854495", + "retrieved_timestamp": "1762652579.854495", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Rombo-Org/Rombo-LLM-V2.5-Qwen-7b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Rombo-Org/Rombo-LLM-V2.5-Qwen-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.748183708116686 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5399745025607596 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.506797583081571 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39803125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4282746010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Sakalti/QwenTest-7/2d99163e-9ebd-49d9-ad13-ee1f780d277c.json b/data/hfopenllm_v2/alibaba/Sakalti/QwenTest-7/2d99163e-9ebd-49d9-ad13-ee1f780d277c.json new file mode 100644 index 000000000..1c770c304 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Sakalti/QwenTest-7/2d99163e-9ebd-49d9-ad13-ee1f780d277c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_QwenTest-7/1762652579.8585348", + "retrieved_timestamp": "1762652579.858536", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/QwenTest-7", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Sakalti/QwenTest-7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16718861509683197 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3063209532879154 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0037764350453172208 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34218750000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12117686170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.988 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Sakalti/qwen2.5-2.3B/6dc5b101-c681-4010-941a-3983cb9eff53.json b/data/hfopenllm_v2/alibaba/Sakalti/qwen2.5-2.3B/6dc5b101-c681-4010-941a-3983cb9eff53.json new file mode 100644 index 000000000..8d836bb50 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Sakalti/qwen2.5-2.3B/6dc5b101-c681-4010-941a-3983cb9eff53.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_qwen2.5-2.3B/1762652579.869403", + "retrieved_timestamp": "1762652579.8694038", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/qwen2.5-2.3B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Sakalti/qwen2.5-2.3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12879493078365403 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2849449123234445 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.005287009063444109 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38565625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11727061170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2Model", + "params_billions": 2.339 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Impish_QWEN_14B-1M/a059e151-6f32-48ff-900b-4e232aef3cc0.json b/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Impish_QWEN_14B-1M/a059e151-6f32-48ff-900b-4e232aef3cc0.json new file mode 100644 index 000000000..3431e55f2 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Impish_QWEN_14B-1M/a059e151-6f32-48ff-900b-4e232aef3cc0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Impish_QWEN_14B-1M/1762652579.8825831", + "retrieved_timestamp": "1762652579.882584", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SicariusSicariiStuff/Impish_QWEN_14B-1M", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "SicariusSicariiStuff/Impish_QWEN_14B-1M" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7867768631675067 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6282934814011238 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39652567975830816 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35067114093959734 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46146875000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.504404920212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Impish_QWEN_7B-1M/64c02fd8-386d-4b4c-bc00-d243cfcae7f1.json b/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Impish_QWEN_7B-1M/64c02fd8-386d-4b4c-bc00-d243cfcae7f1.json new file mode 100644 index 000000000..f87cd61b6 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Impish_QWEN_7B-1M/64c02fd8-386d-4b4c-bc00-d243cfcae7f1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Impish_QWEN_7B-1M/1762652579.8828428", + "retrieved_timestamp": "1762652579.882844", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SicariusSicariiStuff/Impish_QWEN_7B-1M", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "SicariusSicariiStuff/Impish_QWEN_7B-1M" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6381744881359238 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.537172912933626 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30891238670694865 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40739583333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4265292553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncencored/7c6f4fa2-6847-4f57-8a8f-31673bd8b1e7.json b/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncencored/7c6f4fa2-6847-4f57-8a8f-31673bd8b1e7.json new file mode 100644 index 000000000..171ef4944 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncencored/7c6f4fa2-6847-4f57-8a8f-31673bd8b1e7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Qwen2.5-14B_Uncencored/1762652579.883748", + "retrieved_timestamp": "1762652579.883749", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SicariusSicariiStuff/Qwen2.5-14B_Uncencored", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncencored" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31579099012841483 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6308941945507827 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31797583081570996 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38171140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45166666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.526595744680851 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncensored/ea18a046-87bb-42d9-a1b2-d01fe875c970.json b/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncensored/ea18a046-87bb-42d9-a1b2-d01fe875c970.json new file mode 100644 index 000000000..6187f4b33 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncensored/ea18a046-87bb-42d9-a1b2-d01fe875c970.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Qwen2.5-14B_Uncensored/1762652579.883949", + "retrieved_timestamp": "1762652579.88395", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3173147249298528 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6308941945507827 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31797583081570996 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38171140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45166666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.526595744680851 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct/8012de5a-8cb0-4039-895f-70c20e9237ee.json b/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct/8012de5a-8cb0-4039-895f-70c20e9237ee.json new file mode 100644 index 000000000..efe8de8bf --- /dev/null +++ b/data/hfopenllm_v2/alibaba/SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct/8012de5a-8cb0-4039-895f-70c20e9237ee.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Qwen2.5-14B_Uncensored_Instruct/1762652579.884166", + "retrieved_timestamp": "1762652579.884167", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3789389929830627 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5936792404117958 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3285498489425982 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3296979865771812 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36965625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5127160904255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/StelleX/Qwen2.5_Math_7B_Cot/a0802c61-1314-4a46-9b61-7a89246bac42.json b/data/hfopenllm_v2/alibaba/StelleX/Qwen2.5_Math_7B_Cot/a0802c61-1314-4a46-9b61-7a89246bac42.json new file mode 100644 index 000000000..3fb322cdd --- /dev/null +++ b/data/hfopenllm_v2/alibaba/StelleX/Qwen2.5_Math_7B_Cot/a0802c61-1314-4a46-9b61-7a89246bac42.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/StelleX_Qwen2.5_Math_7B_Cot/1762652579.8928509", + "retrieved_timestamp": "1762652579.892852", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "StelleX/Qwen2.5_Math_7B_Cot", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "StelleX/Qwen2.5_Math_7B_Cot" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2142747908881767 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4312922433417096 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32628398791540786 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39241666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.281000664893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/T145/qwen-2.5-3B-merge-test/071d7565-90e5-43e8-a158-ab333beacdcf.json b/data/hfopenllm_v2/alibaba/T145/qwen-2.5-3B-merge-test/071d7565-90e5-43e8-a158-ab333beacdcf.json new file mode 100644 index 000000000..3c58ab062 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/T145/qwen-2.5-3B-merge-test/071d7565-90e5-43e8-a158-ab333beacdcf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_qwen-2.5-3B-merge-test/1762652579.908712", + "retrieved_timestamp": "1762652579.9087129", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/qwen-2.5-3B-merge-test", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "T145/qwen-2.5-3B-merge-test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5751018408932742 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4842488747720393 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3202416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40072916666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3289561170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule/7621e05b-1b5e-43e5-a65c-322334575e68.json b/data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule/7621e05b-1b5e-43e5-a65c-322334575e68.json new file mode 100644 index 000000000..c272d8b61 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule/7621e05b-1b5e-43e5-a65c-322334575e68.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TIGER-Lab_AceCoder-Qwen2.5-7B-Ins-Rule/1762652579.910362", + "retrieved_timestamp": "1762652579.910363", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.742413462944986 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5404426673547671 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49924471299093653 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39803125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4321808510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule/f6223009-028e-4063-90ce-e008a3b5b284.json b/data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule/f6223009-028e-4063-90ce-e008a3b5b284.json new file mode 100644 index 000000000..807348eeb --- /dev/null +++ b/data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule/f6223009-028e-4063-90ce-e008a3b5b284.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Base-Rule/1762652579.910613", + "retrieved_timestamp": "1762652579.910613", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44076273177391545 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49023782785253694 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20166163141993956 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34488541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37450132978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule/f75e2bca-e300-4b3c-a5aa-f6aae03e7330.json b/data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule/f75e2bca-e300-4b3c-a5aa-f6aae03e7330.json new file mode 100644 index 000000000..413f80bc6 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule/f75e2bca-e300-4b3c-a5aa-f6aae03e7330.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Ins-Rule/1762652579.910825", + "retrieved_timestamp": "1762652579.910826", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6222378843690297 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5089236146835355 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36027190332326287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40463541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34283577127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/TIGER-Lab/Qwen2.5-Math-7B-CFT/07e72fc4-9c37-4a81-a788-8619035c66d3.json b/data/hfopenllm_v2/alibaba/TIGER-Lab/Qwen2.5-Math-7B-CFT/07e72fc4-9c37-4a81-a788-8619035c66d3.json new file mode 100644 index 000000000..708353ab1 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/TIGER-Lab/Qwen2.5-Math-7B-CFT/07e72fc4-9c37-4a81-a788-8619035c66d3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TIGER-Lab_Qwen2.5-Math-7B-CFT/1762652579.911227", + "retrieved_timestamp": "1762652579.911228", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TIGER-Lab/Qwen2.5-Math-7B-CFT", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "TIGER-Lab/Qwen2.5-Math-7B-CFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2776976200924658 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46369414980230833 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5574018126888217 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38866666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446476063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-r-v0.3/43b106fe-ff02-4cfe-956f-cfc9e272de78.json b/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-r-v0.3/43b106fe-ff02-4cfe-956f-cfc9e272de78.json new file mode 100644 index 000000000..f8ac7564e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-r-v0.3/43b106fe-ff02-4cfe-956f-cfc9e272de78.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-r-v0.3/1762652579.917092", + "retrieved_timestamp": "1762652579.917093", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheTsar1209/qwen-carpmuscle-r-v0.3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "TheTsar1209/qwen-carpmuscle-r-v0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44550902715904905 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6227124007872 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30060422960725075 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35067114093959734 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42776041666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5103058510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.1/ce9658b7-b457-4fb3-8fce-4173b5d93f2d.json b/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.1/ce9658b7-b457-4fb3-8fce-4173b5d93f2d.json new file mode 100644 index 000000000..684966803 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.1/ce9658b7-b457-4fb3-8fce-4173b5d93f2d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.1/1762652579.917331", + "retrieved_timestamp": "1762652579.917332", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheTsar1209/qwen-carpmuscle-v0.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "TheTsar1209/qwen-carpmuscle-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5621628390448454 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.643430074129922 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2628398791540785 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34395973154362414 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41610416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.520029920212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.2/eed9909e-db3e-4d6a-8caa-3f208ace941d.json b/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.2/eed9909e-db3e-4d6a-8caa-3f208ace941d.json new file mode 100644 index 000000000..d401bf54d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.2/eed9909e-db3e-4d6a-8caa-3f208ace941d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.2/1762652579.917543", + "retrieved_timestamp": "1762652579.917544", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheTsar1209/qwen-carpmuscle-v0.2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "TheTsar1209/qwen-carpmuscle-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5256929391791557 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6386922464145662 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28323262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35570469798657717 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43455208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5147107712765957 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.3/f8aa8470-6803-458e-8207-b217969dd6f3.json b/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.3/f8aa8470-6803-458e-8207-b217969dd6f3.json new file mode 100644 index 000000000..0083b6ac0 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.3/f8aa8470-6803-458e-8207-b217969dd6f3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.3/1762652579.917758", + "retrieved_timestamp": "1762652579.917759", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheTsar1209/qwen-carpmuscle-v0.3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "TheTsar1209/qwen-carpmuscle-v0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4476322823441801 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6151533941210218 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31344410876132933 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3565436241610738 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4131875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5061502659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.4.1/c464e6b4-aa76-4b42-ab9b-71f193ec2a57.json b/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.4.1/c464e6b4-aa76-4b42-ab9b-71f193ec2a57.json new file mode 100644 index 000000000..4f63f0f88 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.4.1/c464e6b4-aa76-4b42-ab9b-71f193ec2a57.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.4.1/1762652579.918201", + "retrieved_timestamp": "1762652579.9182022", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheTsar1209/qwen-carpmuscle-v0.4.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "TheTsar1209/qwen-carpmuscle-v0.4.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7359938297051822 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6506533698399672 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27794561933534745 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34563758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44890625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5191156914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.4/90fe60dc-76dd-4e90-99b4-c16d026afcb5.json b/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.4/90fe60dc-76dd-4e90-99b4-c16d026afcb5.json new file mode 100644 index 000000000..1310d5eff --- /dev/null +++ b/data/hfopenllm_v2/alibaba/TheTsar1209/qwen-carpmuscle-v0.4/90fe60dc-76dd-4e90-99b4-c16d026afcb5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.4/1762652579.917984", + "retrieved_timestamp": "1762652579.917985", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheTsar1209/qwen-carpmuscle-v0.4", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "TheTsar1209/qwen-carpmuscle-v0.4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7202068289915202 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6453667027727318 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.277190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3523489932885906 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45160416666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5143783244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Triangle104/DSR1-Distill-Qwen-7B-RP/856c2575-700c-4b00-8883-bcde8841e262.json b/data/hfopenllm_v2/alibaba/Triangle104/DSR1-Distill-Qwen-7B-RP/856c2575-700c-4b00-8883-bcde8841e262.json new file mode 100644 index 000000000..8a390e587 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Triangle104/DSR1-Distill-Qwen-7B-RP/856c2575-700c-4b00-8883-bcde8841e262.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_DSR1-Distill-Qwen-7B-RP/1762652579.923616", + "retrieved_timestamp": "1762652579.923616", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/DSR1-Distill-Qwen-7B-RP", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Triangle104/DSR1-Distill-Qwen-7B-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36092900171544834 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4326490703099772 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48036253776435045 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40454166666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30277593085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Weyaxi/Einstein-v7-Qwen2-7B/b20c1304-d782-4d41-9c15-0091f9c914e4.json b/data/hfopenllm_v2/alibaba/Weyaxi/Einstein-v7-Qwen2-7B/b20c1304-d782-4d41-9c15-0091f9c914e4.json new file mode 100644 index 000000000..67dad6161 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Weyaxi/Einstein-v7-Qwen2-7B/b20c1304-d782-4d41-9c15-0091f9c914e4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v7-Qwen2-7B/1762652579.949607", + "retrieved_timestamp": "1762652579.949609", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Weyaxi/Einstein-v7-Qwen2-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Weyaxi/Einstein-v7-Qwen2-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4099633417111043 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5161472249498397 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19939577039274925 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43997916666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4095744680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-7B-MS-Destroyer/c5d4bbfe-68a9-4808-ab2e-e92dd88ba06a.json b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-7B-MS-Destroyer/c5d4bbfe-68a9-4808-ab2e-e92dd88ba06a.json new file mode 100644 index 000000000..09e23dbd3 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-7B-MS-Destroyer/c5d4bbfe-68a9-4808-ab2e-e92dd88ba06a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-7B-MS-Destroyer/1762652579.953399", + "retrieved_timestamp": "1762652579.953399", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Xiaojian9992024/Qwen2.5-7B-MS-Destroyer", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Xiaojian9992024/Qwen2.5-7B-MS-Destroyer" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7295741964653786 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5469696828400438 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42702083333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4412400265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2/5cf588ed-fde6-4ee1-833e-a6743cc1834c.json b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2/5cf588ed-fde6-4ee1-833e-a6743cc1834c.json new file mode 100644 index 000000000..02a76ad5e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2/5cf588ed-fde6-4ee1-833e-a6743cc1834c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview-v0.2/1762652579.953881", + "retrieved_timestamp": "1762652579.9538822", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6701984068937087 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.537439126573433 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47205438066465255 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4467083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4370844414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview/97a591f9-2052-43b3-851d-ac73c793a000.json b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview/97a591f9-2052-43b3-851d-ac73c793a000.json new file mode 100644 index 000000000..847acae1d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview/97a591f9-2052-43b3-851d-ac73c793a000.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview/1762652579.95366", + "retrieved_timestamp": "1762652579.953661", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7640205765147586 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5543342320067098 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4879154078549849 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44807291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43758311170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored/89ca3fb4-eb53-422c-a4dd-029bd1fc7c37.json b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored/89ca3fb4-eb53-422c-a4dd-029bd1fc7c37.json new file mode 100644 index 000000000..c22414580 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored/89ca3fb4-eb53-422c-a4dd-029bd1fc7c37.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-THREADRIPPER-Medium-Censored/1762652579.95415", + "retrieved_timestamp": "1762652579.954151", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8112064876749248 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6431453053747279 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.533987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3347315436241611 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.414 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49285239361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/4fcdfdff-87be-47b0-93bb-b4bc0bb2499d.json b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/4fcdfdff-87be-47b0-93bb-b4bc0bb2499d.json new file mode 100644 index 000000000..4d7f76714 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/4fcdfdff-87be-47b0-93bb-b4bc0bb2499d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/1762652579.954578", + "retrieved_timestamp": "1762652579.954578", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7403899431286763 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5465437953400678 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5075528700906344 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38069791666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4393284574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small/a55039b6-922f-4732-9feb-fa757f627ebd.json b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small/a55039b6-922f-4732-9feb-fa757f627ebd.json new file mode 100644 index 000000000..1ec554961 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-THREADRIPPER-Small/a55039b6-922f-4732-9feb-fa757f627ebd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small/1762652579.9543638", + "retrieved_timestamp": "1762652579.954365", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7689164749531243 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5489785469339065 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4735649546827795 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43492708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4356715425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp/ddfae432-5d3c-4c7e-bc7f-087cddea014f.json b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp/ddfae432-5d3c-4c7e-bc7f-087cddea014f.json new file mode 100644 index 000000000..65fe9e05c --- /dev/null +++ b/data/hfopenllm_v2/alibaba/Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp/ddfae432-5d3c-4c7e-bc7f-087cddea014f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-Ultra-1.5B-25.02-Exp/1762652579.954794", + "retrieved_timestamp": "1762652579.9547951", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4073403015111017 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40655813090204523 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3383125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26412898936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-1M-YOYO-V3/fdc183ed-50d6-40c3-8e7b-02a37fc42a00.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-1M-YOYO-V3/fdc183ed-50d6-40c3-8e7b-02a37fc42a00.json new file mode 100644 index 000000000..01495428c --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-1M-YOYO-V3/fdc183ed-50d6-40c3-8e7b-02a37fc42a00.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-1M-YOYO-V3/1762652579.955529", + "retrieved_timestamp": "1762652579.95553", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/Qwen2.5-14B-1M-YOYO-V3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/Qwen2.5-14B-1M-YOYO-V3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8398327548681941 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6448491305599157 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5354984894259819 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288590604026846 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.414125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5206948138297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0505/1835078d-7897-4517-9d7b-86a2285dfa27.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0505/1835078d-7897-4517-9d7b-86a2285dfa27.json new file mode 100644 index 000000000..6d02225a4 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0505/1835078d-7897-4517-9d7b-86a2285dfa27.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-0505/1762652579.9557781", + "retrieved_timestamp": "1762652579.9557781", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/Qwen2.5-14B-YOYO-0505", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-0505" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5882912893345214 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6539239511887702 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4433534743202417 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3733221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47569791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5370678191489362 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0510-v2/ad6edd05-e83f-4da3-b200-c1d972548e8b.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0510-v2/ad6edd05-e83f-4da3-b200-c1d972548e8b.json new file mode 100644 index 000000000..e88aa2137 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0510-v2/ad6edd05-e83f-4da3-b200-c1d972548e8b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-0510-v2/1762652579.955989", + "retrieved_timestamp": "1762652579.955989", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/Qwen2.5-14B-YOYO-0510-v2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-0510-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.594710922574325 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6552826977321495 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44410876132930516 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38171140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47439583333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5380651595744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0805/6d4ac88f-7a02-4f78-9990-6736972f43f7.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0805/6d4ac88f-7a02-4f78-9990-6736972f43f7.json new file mode 100644 index 000000000..b27b7f7d1 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-0805/6d4ac88f-7a02-4f78-9990-6736972f43f7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-0805/1762652579.956195", + "retrieved_timestamp": "1762652579.956195", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/Qwen2.5-14B-YOYO-0805", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-0805" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5882912893345214 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6539239511887702 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4433534743202417 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3733221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47569791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5370678191489362 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1005-v2/ed12a458-8c3b-4e08-a218-e94b4fdd89d8.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1005-v2/ed12a458-8c3b-4e08-a218-e94b4fdd89d8.json new file mode 100644 index 000000000..705d263ea --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1005-v2/ed12a458-8c3b-4e08-a218-e94b4fdd89d8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1005-v2/1762652579.956619", + "retrieved_timestamp": "1762652579.956619", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/Qwen2.5-14B-YOYO-1005-v2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-1005-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.595310442958018 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6551321410649699 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4433534743202417 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38422818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4730625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5371509308510638 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1005/29058700-6465-476d-b1c9-2bb89d70c52b.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1005/29058700-6465-476d-b1c9-2bb89d70c52b.json new file mode 100644 index 000000000..483508307 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1005/29058700-6465-476d-b1c9-2bb89d70c52b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1005/1762652579.9563992", + "retrieved_timestamp": "1762652579.9564002", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/Qwen2.5-14B-YOYO-1005", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-1005" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5971588717935079 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6542059787912534 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.452416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47303125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5382313829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010-v2/2047ae80-fdc6-4e94-90e6-b3cac52d8c45.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010-v2/2047ae80-fdc6-4e94-90e6-b3cac52d8c45.json new file mode 100644 index 000000000..df0124e2d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010-v2/2047ae80-fdc6-4e94-90e6-b3cac52d8c45.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1010-v2/1762652579.957223", + "retrieved_timestamp": "1762652579.957223", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/Qwen2.5-14B-YOYO-1010-v2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-1010-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.594710922574325 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6552826977321495 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44410876132930516 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38171140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47439583333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5380651595744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010/1de35d6f-c62f-48fd-b921-41e85b55434a.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010/1de35d6f-c62f-48fd-b921-41e85b55434a.json new file mode 100644 index 000000000..4bc255050 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010/1de35d6f-c62f-48fd-b921-41e85b55434a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1010/1762652579.957045", + "retrieved_timestamp": "1762652579.957045", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/Qwen2.5-14B-YOYO-1010", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-1010" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7904737208384863 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6405986391086301 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4180625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49443151595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010/6a676239-eed6-44dc-b395-1b2453d5b0ba.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010/6a676239-eed6-44dc-b395-1b2453d5b0ba.json new file mode 100644 index 000000000..d967872c5 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-1010/6a676239-eed6-44dc-b395-1b2453d5b0ba.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1010/1762652579.956832", + "retrieved_timestamp": "1762652579.956832", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/Qwen2.5-14B-YOYO-1010", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-1010" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5898648918203699 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6539973096042956 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4509063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38338926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47439583333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5375664893617021 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-SCE/e0545222-4bd1-490a-a315-5b9ce9742310.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-SCE/e0545222-4bd1-490a-a315-5b9ce9742310.json new file mode 100644 index 000000000..aba520b2f --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-SCE/e0545222-4bd1-490a-a315-5b9ce9742310.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-SCE/1762652579.957431", + "retrieved_timestamp": "1762652579.957431", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/Qwen2.5-14B-YOYO-SCE", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-SCE" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5843694729983111 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6489486805510399 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46148036253776437 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37416107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47042708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5380651595744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4-p1/441375d9-0375-4a15-9d50-267395d3ab13.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4-p1/441375d9-0375-4a15-9d50-267395d3ab13.json new file mode 100644 index 000000000..ba7018f28 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4-p1/441375d9-0375-4a15-9d50-267395d3ab13.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-V4-p1/1762652579.957833", + "retrieved_timestamp": "1762652579.957834", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8203488964835526 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6515535751177631 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5332326283987915 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34563758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41942708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5019946808510638 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4-p2/9ecdd8a3-247b-46b2-ae3b-5798685329ef.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4-p2/9ecdd8a3-247b-46b2-ae3b-5798685329ef.json new file mode 100644 index 000000000..341061495 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4-p2/9ecdd8a3-247b-46b2-ae3b-5798685329ef.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-V4-p2/1762652579.958032", + "retrieved_timestamp": "1762652579.9580328", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8047868544351211 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6338919627514907 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5166163141993958 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271812080536913 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44345833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49675864361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4/c76d318b-eba5-4407-be86-a92051791f00.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4/c76d318b-eba5-4407-be86-a92051791f00.json new file mode 100644 index 000000000..b09374121 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-V4/c76d318b-eba5-4407-be86-a92051791f00.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-V4/1762652579.9576309", + "retrieved_timestamp": "1762652579.957632", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/Qwen2.5-14B-YOYO-V4", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8397828871837835 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6490345839036636 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5347432024169184 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221476510067114 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41152083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5169547872340425 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-latest-V2/b97b327c-1730-4bfe-b5fe-00dbfcd0d372.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-latest-V2/b97b327c-1730-4bfe-b5fe-00dbfcd0d372.json new file mode 100644 index 000000000..aea1891d1 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-latest-V2/b97b327c-1730-4bfe-b5fe-00dbfcd0d372.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-latest-V2/1762652579.958441", + "retrieved_timestamp": "1762652579.958441", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/Qwen2.5-14B-YOYO-latest-V2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-latest-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7771346693440072 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6299023045601466 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5158610271903323 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3540268456375839 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42993750000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5223570478723404 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-latest/d5487f61-9be7-4ffc-af6d-be9f925dd4ba.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-latest/d5487f61-9be7-4ffc-af6d-be9f925dd4ba.json new file mode 100644 index 000000000..a4b69be40 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-YOYO-latest/d5487f61-9be7-4ffc-af6d-be9f925dd4ba.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-latest/1762652579.95823", + "retrieved_timestamp": "1762652579.958231", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/Qwen2.5-14B-YOYO-latest", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/Qwen2.5-14B-YOYO-latest" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.591063932587756 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6656232526900528 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4418429003021148 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3825503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.469125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5370678191489362 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-it-restore/ab78a98d-0cad-4215-8f37-f3093066a98d.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-it-restore/ab78a98d-0cad-4215-8f37-f3093066a98d.json new file mode 100644 index 000000000..6ab667c67 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-14B-it-restore/ab78a98d-0cad-4215-8f37-f3093066a98d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-it-restore/1762652579.958646", + "retrieved_timestamp": "1762652579.958647", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/Qwen2.5-14B-it-restore", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/Qwen2.5-14B-it-restore" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8209484168672456 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6387730309916794 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5370090634441088 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.337248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40872916666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4900265957446808 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-7B-it-restore/2f2577b8-28e3-4fa1-8e65-66e59499b9cd.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-7B-it-restore/2f2577b8-28e3-4fa1-8e65-66e59499b9cd.json new file mode 100644 index 000000000..df9aa3d94 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-7B-it-restore/2f2577b8-28e3-4fa1-8e65-66e59499b9cd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-7B-it-restore/1762652579.958842", + "retrieved_timestamp": "1762652579.958842", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/Qwen2.5-7B-it-restore", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/Qwen2.5-7B-it-restore" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7530796065550517 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5406524352251431 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40069791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42877327127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010/4f6bda51-89d3-4005-9133-db6d871ae87d.json b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010/4f6bda51-89d3-4005-9133-db6d871ae87d.json new file mode 100644 index 000000000..8909b69ea --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010/4f6bda51-89d3-4005-9133-db6d871ae87d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-Coder-14B-YOYO-1010/1762652579.9590368", + "retrieved_timestamp": "1762652579.959038", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5335864395359867 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6186663964199025 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3217522658610272 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3523489932885906 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4422395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4074966755319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2/0c7e0639-a082-47f1-bf32-0c45ce573f0a.json b/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2/0c7e0639-a082-47f1-bf32-0c45ce573f0a.json new file mode 100644 index 000000000..00317316d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2/0c7e0639-a082-47f1-bf32-0c45ce573f0a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V2/1762652579.959567", + "retrieved_timestamp": "1762652579.9595678", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5070834275278483 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6452083564140533 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3542296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37919463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46890625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5371509308510638 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3/4f85534a-0b12-42c4-a0d3-06d4d8337e0c.json b/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3/4f85534a-0b12-42c4-a0d3-06d4d8337e0c.json new file mode 100644 index 000000000..12f54f45d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3/4f85534a-0b12-42c4-a0d3-06d4d8337e0c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V3/1762652579.959789", + "retrieved_timestamp": "1762652579.959789", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8577928784513978 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6359248665982408 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.527190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33221476510067116 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40215625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4881150265957447 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4/f5b253b5-4c42-49f8-9f3f-d85a5b2502c0.json b/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4/f5b253b5-4c42-49f8-9f3f-d85a5b2502c0.json new file mode 100644 index 000000000..4a8dbcfe4 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4/f5b253b5-4c42-49f8-9f3f-d85a5b2502c0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V4/1762652579.959998", + "retrieved_timestamp": "1762652579.959999", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8364605912312664 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.651497220848125 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5392749244712991 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44342708333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5203623670212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B/2dd14fef-53f5-491d-a5e1-7e19f6043049.json b/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B/2dd14fef-53f5-491d-a5e1-7e19f6043049.json new file mode 100644 index 000000000..535074159 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YOYO-AI/ZYH-LLM-Qwen2.5-14B/2dd14fef-53f5-491d-a5e1-7e19f6043049.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YOYO-AI_ZYH-LLM-Qwen2.5-14B/1762652579.959276", + "retrieved_timestamp": "1762652579.9592772", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YOYO-AI/ZYH-LLM-Qwen2.5-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.594111402190632 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6644460038734455 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.411631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3859060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47569791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5350731382978723 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/YoungPanda/qwenqwen/7e4c528f-bb42-40e7-b849-86732d2f2a18.json b/data/hfopenllm_v2/alibaba/YoungPanda/qwenqwen/7e4c528f-bb42-40e7-b849-86732d2f2a18.json new file mode 100644 index 000000000..70cd583a8 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/YoungPanda/qwenqwen/7e4c528f-bb42-40e7-b849-86732d2f2a18.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/YoungPanda_qwenqwen/1762652579.964632", + "retrieved_timestamp": "1762652579.964633", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "YoungPanda/qwenqwen", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "YoungPanda/qwenqwen" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12639684924888184 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.337898518087465 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.035498489425981876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34336458333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11677194148936171 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 14.316 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B/8b61e7aa-3ba3-4e25-b1bf-9718970a111a.json b/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B/8b61e7aa-3ba3-4e25-b1bf-9718970a111a.json new file mode 100644 index 000000000..b25d9ffc3 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B/8b61e7aa-3ba3-4e25-b1bf-9718970a111a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen-2.5-Aether-SlerpFusion-7B/1762652579.9677062", + "retrieved_timestamp": "1762652579.9677062", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6261597007052399 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5462236205548866 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27341389728096677 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41778125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43267952127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M/d912a685-7187-4b56-a7a8-881ed678ae2f.json b/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M/d912a685-7187-4b56-a7a8-881ed678ae2f.json new file mode 100644 index 000000000..48030382e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M/d912a685-7187-4b56-a7a8-881ed678ae2f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen2.5-7B-CelestialHarmony-1M/1762652579.967964", + "retrieved_timestamp": "1762652579.967965", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5943862285402732 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5431374181474681 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4595416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4386635638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix/500a7a12-9c94-4ed8-b2b4-33473141c3c7.json b/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix/500a7a12-9c94-4ed8-b2b4-33473141c3c7.json new file mode 100644 index 000000000..e4eb8094e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix/500a7a12-9c94-4ed8-b2b4-33473141c3c7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen2.5-7B-HomerAnvita-NerdMix/1762652579.96818", + "retrieved_timestamp": "1762652579.968181", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7707649037886142 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5541319848156986 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38368580060422963 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43905208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4431515957446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-HomerCreative-Mix/336aaa71-3f35-48f3-bede-cb9ab3324cfc.json b/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-HomerCreative-Mix/336aaa71-3f35-48f3-bede-cb9ab3324cfc.json new file mode 100644 index 000000000..9c55a92b8 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-HomerCreative-Mix/336aaa71-3f35-48f3-bede-cb9ab3324cfc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen2.5-7B-HomerCreative-Mix/1762652579.968384", + "retrieved_timestamp": "1762652579.968385", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZeroXClem/Qwen2.5-7B-HomerCreative-Mix", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "ZeroXClem/Qwen2.5-7B-HomerCreative-Mix" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7835044348994002 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5548068560095062 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3564954682779456 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43495833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4447307180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-Qandora-CySec/7a495a80-f712-477b-bd5c-0cf7a07e8ef2.json b/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-Qandora-CySec/7a495a80-f712-477b-bd5c-0cf7a07e8ef2.json new file mode 100644 index 000000000..19945da3f --- /dev/null +++ b/data/hfopenllm_v2/alibaba/ZeroXClem/Qwen2.5-7B-Qandora-CySec/7a495a80-f712-477b-bd5c-0cf7a07e8ef2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen2.5-7B-Qandora-CySec/1762652579.968593", + "retrieved_timestamp": "1762652579.9685938", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZeroXClem/Qwen2.5-7B-Qandora-CySec", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "ZeroXClem/Qwen2.5-7B-Qandora-CySec" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6773172958860268 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5490022663689288 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2930513595166163 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4286041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4484707446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/abacusai/Liberated-Qwen1.5-14B/614f3e27-e150-4edb-9438-06d0b0f38ca3.json b/data/hfopenllm_v2/alibaba/abacusai/Liberated-Qwen1.5-14B/614f3e27-e150-4edb-9438-06d0b0f38ca3.json new file mode 100644 index 000000000..ee6f1d259 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/abacusai/Liberated-Qwen1.5-14B/614f3e27-e150-4edb-9438-06d0b0f38ca3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/abacusai_Liberated-Qwen1.5-14B/1762652579.9698281", + "retrieved_timestamp": "1762652579.9698281", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "abacusai/Liberated-Qwen1.5-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "abacusai/Liberated-Qwen1.5-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36310212458499 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49480009174671863 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16012084592145015 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41746875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35123005319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/adriszmar/QAIMath-Qwen2.5-7B-TIES/457f0bc3-68e1-4ecb-a983-5f504b1246cd.json b/data/hfopenllm_v2/alibaba/adriszmar/QAIMath-Qwen2.5-7B-TIES/457f0bc3-68e1-4ecb-a983-5f504b1246cd.json new file mode 100644 index 000000000..29956dc93 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/adriszmar/QAIMath-Qwen2.5-7B-TIES/457f0bc3-68e1-4ecb-a983-5f504b1246cd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/adriszmar_QAIMath-Qwen2.5-7B-TIES/1762652579.975151", + "retrieved_timestamp": "1762652579.975153", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "adriszmar/QAIMath-Qwen2.5-7B-TIES", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "adriszmar/QAIMath-Qwen2.5-7B-TIES" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16853725891745014 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31242688274884584 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0015105740181268882 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24916107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39629166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10663231382978723 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/adriszmar/QAIMath-Qwen2.5-7B-TIES/78544e05-7eed-465d-9199-35b25e1bebfe.json b/data/hfopenllm_v2/alibaba/adriszmar/QAIMath-Qwen2.5-7B-TIES/78544e05-7eed-465d-9199-35b25e1bebfe.json new file mode 100644 index 000000000..c5126a5c1 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/adriszmar/QAIMath-Qwen2.5-7B-TIES/78544e05-7eed-465d-9199-35b25e1bebfe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/adriszmar_QAIMath-Qwen2.5-7B-TIES/1762652579.9747589", + "retrieved_timestamp": "1762652579.9747598", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "adriszmar/QAIMath-Qwen2.5-7B-TIES", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "adriszmar/QAIMath-Qwen2.5-7B-TIES" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.174632198123202 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3126379538396578 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24496644295302014 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40959375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10871010638297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-42B-AGI/de6fe2ab-47de-4616-a0b9-b2cb6f44b16b.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-42B-AGI/de6fe2ab-47de-4616-a0b9-b2cb6f44b16b.json new file mode 100644 index 000000000..857d3e4c4 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-42B-AGI/de6fe2ab-47de-4616-a0b9-b2cb6f44b16b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-42B-AGI/1762652579.9983659", + "retrieved_timestamp": "1762652579.998367", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Qwen2.5-42B-AGI", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "allknowingroger/Qwen2.5-42B-AGI" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19129354557019818 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2942104150907988 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36203125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11677194148936171 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 42.516 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task2/3518e992-9548-4025-a641-99a2cf3833e4.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task2/3518e992-9548-4025-a641-99a2cf3833e4.json new file mode 100644 index 000000000..7b705af05 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task2/3518e992-9548-4025-a641-99a2cf3833e4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task2/1762652579.998622", + "retrieved_timestamp": "1762652579.998623", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Qwen2.5-7B-task2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "allknowingroger/Qwen2.5-7B-task2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45270327176336567 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5625940266685543 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3549848942598187 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43696874999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4517121010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task3/0c556e08-bb71-406c-88b8-d45fc4cc43f0.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task3/0c556e08-bb71-406c-88b8-d45fc4cc43f0.json new file mode 100644 index 000000000..c4fa1605c --- /dev/null +++ b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task3/0c556e08-bb71-406c-88b8-d45fc4cc43f0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task3/1762652579.998833", + "retrieved_timestamp": "1762652579.998834", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Qwen2.5-7B-task3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "allknowingroger/Qwen2.5-7B-task3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.512903540383959 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5397623813486384 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26057401812688824 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43557291666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45013297872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task4/a200d34f-8ed0-4f1d-93e2-cff38b1811f9.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task4/a200d34f-8ed0-4f1d-93e2-cff38b1811f9.json new file mode 100644 index 000000000..7e270f5d2 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task4/a200d34f-8ed0-4f1d-93e2-cff38b1811f9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task4/1762652579.999042", + "retrieved_timestamp": "1762652579.999042", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Qwen2.5-7B-task4", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "allknowingroger/Qwen2.5-7B-task4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5005385709916355 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5583446038580263 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311178247734139 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43954166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45611702127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task7/b5b02465-0d3f-4ccc-a104-174fcf53dc9a.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task7/b5b02465-0d3f-4ccc-a104-174fcf53dc9a.json new file mode 100644 index 000000000..cfff1e9e2 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task7/b5b02465-0d3f-4ccc-a104-174fcf53dc9a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task7/1762652579.999242", + "retrieved_timestamp": "1762652579.999243", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Qwen2.5-7B-task7", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "allknowingroger/Qwen2.5-7B-task7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42842325030917966 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.555243179835915 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4325625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4133144946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task8/956640e9-97a3-4641-9ed0-a63831a8ee58.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task8/956640e9-97a3-4641-9ed0-a63831a8ee58.json new file mode 100644 index 000000000..8944fce29 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-7B-task8/956640e9-97a3-4641-9ed0-a63831a8ee58.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task8/1762652579.9994612", + "retrieved_timestamp": "1762652579.999462", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Qwen2.5-7B-task8", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "allknowingroger/Qwen2.5-7B-task8" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4645185884564068 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5524895381578828 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3527190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45144791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44331781914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-slerp-14B/ba80d36c-7688-40e8-8182-251c6b9e6b19.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-slerp-14B/ba80d36c-7688-40e8-8182-251c6b9e6b19.json new file mode 100644 index 000000000..767c31531 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/allknowingroger/Qwen2.5-slerp-14B/ba80d36c-7688-40e8-8182-251c6b9e6b19.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-slerp-14B/1762652579.999685", + "retrieved_timestamp": "1762652579.999686", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Qwen2.5-slerp-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "allknowingroger/Qwen2.5-slerp-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49282016161562425 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.65124197415124 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4622356495468278 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3674496644295302 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47439583333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5378989361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp12-7B/18c67de4-1518-44b6-b92f-b490e9d55877.json b/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp12-7B/18c67de4-1518-44b6-b92f-b490e9d55877.json new file mode 100644 index 000000000..4f2abde99 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp12-7B/18c67de4-1518-44b6-b92f-b490e9d55877.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_QwenSlerp12-7B/1762652579.999902", + "retrieved_timestamp": "1762652579.999903", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/QwenSlerp12-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "allknowingroger/QwenSlerp12-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5075577246151324 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5556448443090559 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45947916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4460605053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp4-14B/1393cab1-31aa-470c-bca1-53f99d7ea1e8.json b/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp4-14B/1393cab1-31aa-470c-bca1-53f99d7ea1e8.json new file mode 100644 index 000000000..ea61568c4 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp4-14B/1393cab1-31aa-470c-bca1-53f99d7ea1e8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_QwenSlerp4-14B/1762652580.000124", + "retrieved_timestamp": "1762652580.000125", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/QwenSlerp4-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "allknowingroger/QwenSlerp4-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6327544249258634 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6483250205703057 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3693353474320242 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3724832214765101 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46496875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5435505319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp5-14B/da7928ec-55b8-4d4b-9b9e-b40c5de7136b.json b/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp5-14B/da7928ec-55b8-4d4b-9b9e-b40c5de7136b.json new file mode 100644 index 000000000..f8881ba97 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp5-14B/da7928ec-55b8-4d4b-9b9e-b40c5de7136b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_QwenSlerp5-14B/1762652580.000389", + "retrieved_timestamp": "1762652580.0003898", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/QwenSlerp5-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "allknowingroger/QwenSlerp5-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7119387669162267 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6356573710010681 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3564954682779456 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3649328859060403 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4675416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5390625 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp6-14B/5135513f-f255-412b-ab16-f0d613e4525e.json b/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp6-14B/5135513f-f255-412b-ab16-f0d613e4525e.json new file mode 100644 index 000000000..fa75519e2 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/allknowingroger/QwenSlerp6-14B/5135513f-f255-412b-ab16-f0d613e4525e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_QwenSlerp6-14B/1762652580.0006049", + "retrieved_timestamp": "1762652580.000606", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/QwenSlerp6-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "allknowingroger/QwenSlerp6-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6866846633598851 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6384454358065165 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3723564954682779 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3733221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46896875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5405585106382979 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/QwenStock1-14B/95c86ae6-dcb7-4ed7-a82d-ce0b374cca0e.json b/data/hfopenllm_v2/alibaba/allknowingroger/QwenStock1-14B/95c86ae6-dcb7-4ed7-a82d-ce0b374cca0e.json new file mode 100644 index 000000000..9f9cd83f5 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/allknowingroger/QwenStock1-14B/95c86ae6-dcb7-4ed7-a82d-ce0b374cca0e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_QwenStock1-14B/1762652580.0008268", + "retrieved_timestamp": "1762652580.0008278", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/QwenStock1-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "allknowingroger/QwenStock1-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5634117474966422 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6528491305599156 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3768882175226586 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3766778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47296875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5418051861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/QwenStock2-14B/4a4c258b-2b03-4fad-a5e0-b623a25fb735.json b/data/hfopenllm_v2/alibaba/allknowingroger/QwenStock2-14B/4a4c258b-2b03-4fad-a5e0-b623a25fb735.json new file mode 100644 index 000000000..e1b39207a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/allknowingroger/QwenStock2-14B/4a4c258b-2b03-4fad-a5e0-b623a25fb735.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_QwenStock2-14B/1762652580.001041", + "retrieved_timestamp": "1762652580.001042", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/QwenStock2-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "allknowingroger/QwenStock2-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5563427261887348 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.656885010139055 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38821752265861026 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37919463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47560416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5405585106382979 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/QwenStock3-14B/2b3928ad-ab69-4e63-aa3c-e64dea7b5e6c.json b/data/hfopenllm_v2/alibaba/allknowingroger/QwenStock3-14B/2b3928ad-ab69-4e63-aa3c-e64dea7b5e6c.json new file mode 100644 index 000000000..25a231beb --- /dev/null +++ b/data/hfopenllm_v2/alibaba/allknowingroger/QwenStock3-14B/2b3928ad-ab69-4e63-aa3c-e64dea7b5e6c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_QwenStock3-14B/1762652580.0012438", + "retrieved_timestamp": "1762652580.001245", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/QwenStock3-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "allknowingroger/QwenStock3-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5615134509767417 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6565322062808641 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3776435045317221 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3783557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4755729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5428025265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp2-14B/636ed71e-3d86-4d5d-8b8d-3019f26261fc.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp2-14B/636ed71e-3d86-4d5d-8b8d-3019f26261fc.json new file mode 100644 index 000000000..a3efa0164 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp2-14B/636ed71e-3d86-4d5d-8b8d-3019f26261fc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwenslerp2-14B/1762652580.001452", + "retrieved_timestamp": "1762652580.0014532", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Qwenslerp2-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "allknowingroger/Qwenslerp2-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5007136619724553 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6554876216007552 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44561933534743203 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36828859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4729375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5403091755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp2-7B/a1e6f539-f5d7-4f57-b0da-4df7e5a86240.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp2-7B/a1e6f539-f5d7-4f57-b0da-4df7e5a86240.json new file mode 100644 index 000000000..871456013 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp2-7B/a1e6f539-f5d7-4f57-b0da-4df7e5a86240.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwenslerp2-7B/1762652580.001649", + "retrieved_timestamp": "1762652580.0016499", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Qwenslerp2-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "allknowingroger/Qwenslerp2-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5294396645345462 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5609127334788001 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3421450151057402 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4356041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4515458776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp3-14B/06a2a807-3dbc-42c4-adec-4d6caa01cf74.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp3-14B/06a2a807-3dbc-42c4-adec-4d6caa01cf74.json new file mode 100644 index 000000000..2a087c508 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp3-14B/06a2a807-3dbc-42c4-adec-4d6caa01cf74.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwenslerp3-14B/1762652580.001856", + "retrieved_timestamp": "1762652580.001856", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Qwenslerp3-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "allknowingroger/Qwenslerp3-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5052349986923584 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6520835120117142 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44637462235649544 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46760416666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5394780585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp3-7B/88727af1-7672-4ab5-9cc4-f56d286f3967.json b/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp3-7B/88727af1-7672-4ab5-9cc4-f56d286f3967.json new file mode 100644 index 000000000..cc6eb877f --- /dev/null +++ b/data/hfopenllm_v2/alibaba/allknowingroger/Qwenslerp3-7B/88727af1-7672-4ab5-9cc4-f56d286f3967.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Qwenslerp3-7B/1762652580.0020611", + "retrieved_timestamp": "1762652580.002062", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Qwenslerp3-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "allknowingroger/Qwenslerp3-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.501837347127843 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5580160200086862 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3217522658610272 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45151041666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45420545212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/allknowingroger/Rombos-LLM-V2.5-Qwen-42b/619fde94-d095-4f5c-b36d-19a38b6a8109.json b/data/hfopenllm_v2/alibaba/allknowingroger/Rombos-LLM-V2.5-Qwen-42b/619fde94-d095-4f5c-b36d-19a38b6a8109.json new file mode 100644 index 000000000..ef86715ff --- /dev/null +++ b/data/hfopenllm_v2/alibaba/allknowingroger/Rombos-LLM-V2.5-Qwen-42b/619fde94-d095-4f5c-b36d-19a38b6a8109.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Rombos-LLM-V2.5-Qwen-42b/1762652580.002683", + "retrieved_timestamp": "1762652580.002683", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Rombos-LLM-V2.5-Qwen-42b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "allknowingroger/Rombos-LLM-V2.5-Qwen-42b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1879213819332704 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2969164076001621 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36333333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11677194148936171 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 42.516 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt/d75b9105-a60d-49d9-8606-7b23ff5d3d1a.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt/d75b9105-a60d-49d9-8606-7b23ff5d3d1a.json new file mode 100644 index 000000000..0cbe3bfed --- /dev/null +++ b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt/d75b9105-a60d-49d9-8606-7b23ff5d3d1a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Blunt/1762652580.03596", + "retrieved_timestamp": "1762652580.0359628", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.261136008014291 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27743669901671336 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13821752265861026 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24748322147651006 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35952083333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11835106382978723 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective/40933520-61e0-4cbe-b6b2-b4d19063a1b9.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective/40933520-61e0-4cbe-b6b2-b4d19063a1b9.json new file mode 100644 index 000000000..21a23904e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective/40933520-61e0-4cbe-b6b2-b4d19063a1b9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Reflective/1762652580.0363572", + "retrieved_timestamp": "1762652580.0363579", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30327641768285923 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2908444769655102 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16314199395770393 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33555208333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11303191489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/46a36382-df06-4dc1-93ae-6ae61343a969.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/46a36382-df06-4dc1-93ae-6ae61343a969.json new file mode 100644 index 000000000..1365fab3d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/46a36382-df06-4dc1-93ae-6ae61343a969.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/1762652580.036823", + "retrieved_timestamp": "1762652580.036824", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3751922676276723 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4926903187457697 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5015105740181269 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3447986577181208 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4220625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42428523936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/269f307e-3af1-47a2-92ec-00a59b4725ac.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/269f307e-3af1-47a2-92ec-00a59b4725ac.json new file mode 100644 index 000000000..4e17e6d84 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/269f307e-3af1-47a2-92ec-00a59b4725ac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/1762652580.03794", + "retrieved_timestamp": "1762652580.037941", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.554044380022784 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.337106084887115 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23716012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4247604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15043218085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/244417b6-88a2-483f-adba-c1d944c9cc29.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/244417b6-88a2-483f-adba-c1d944c9cc29.json new file mode 100644 index 000000000..869bb2c52 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/244417b6-88a2-483f-adba-c1d944c9cc29.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/1762652580.037686", + "retrieved_timestamp": "1762652580.037687", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5221456845614081 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3198581755956472 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25075528700906347 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4526979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14835438829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/1bf5eb2a-c0e2-4bfc-9ae1-ec5737974cbe.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/1bf5eb2a-c0e2-4bfc-9ae1-ec5737974cbe.json new file mode 100644 index 000000000..ea8005b0c --- /dev/null +++ b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/1bf5eb2a-c0e2-4bfc-9ae1-ec5737974cbe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/1762652580.038195", + "retrieved_timestamp": "1762652580.038196", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5139274901705253 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3013444769655102 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1472809667673716 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44333333333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12890625 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/41186ba2-77da-496c-afd0-c0f11ea05c9b.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/41186ba2-77da-496c-afd0-c0f11ea05c9b.json new file mode 100644 index 000000000..1ecb34763 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/41186ba2-77da-496c-afd0-c0f11ea05c9b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/1762652580.037415", + "retrieved_timestamp": "1762652580.037416", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5421791956453321 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3170339746824052 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16314199395770393 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4486979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14311835106382978 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt/407adfd5-6a1f-420a-a5de-2e37740d7025.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt/407adfd5-6a1f-420a-a5de-2e37740d7025.json new file mode 100644 index 000000000..f2256bd73 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt/407adfd5-6a1f-420a-a5de-2e37740d7025.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt/1762652580.0370848", + "retrieved_timestamp": "1762652580.037087", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5611632690151022 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32828968244496226 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16389728096676737 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45542708333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14469747340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective/744cef52-b155-4bb0-9411-2eb47938b5d6.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective/744cef52-b155-4bb0-9411-2eb47938b5d6.json new file mode 100644 index 000000000..4466383a2 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective/744cef52-b155-4bb0-9411-2eb47938b5d6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Reflective/1762652580.038453", + "retrieved_timestamp": "1762652580.038454", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4290227706928727 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.301225755504323 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19184290030211482 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4553958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11294880319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B/f269f0cb-4f9b-4f29-84c2-a4f31ff08290.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B/f269f0cb-4f9b-4f29-84c2-a4f31ff08290.json new file mode 100644 index 000000000..843f11416 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-14B/f269f0cb-4f9b-4f29-84c2-a4f31ff08290.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B/1762652580.036597", + "retrieved_timestamp": "1762652580.036598", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "braindao/DeepSeek-R1-Distill-Qwen-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "braindao/DeepSeek-R1-Distill-Qwen-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4171575863154209 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30329653176003074 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17598187311178248 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4487916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11269946808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt/678a08d8-3089-4d97-879d-c5485344de05.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt/678a08d8-3089-4d97-879d-c5485344de05.json new file mode 100644 index 000000000..b12e26913 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt/678a08d8-3089-4d97-879d-c5485344de05.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-7B-Blunt/1762652580.03893", + "retrieved_timestamp": "1762652580.038931", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4266246891581005 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29017781029884354 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21450151057401812 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38851041666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11693816489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/9c8db160-fc92-473f-a766-fb00fc099f6e.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/9c8db160-fc92-473f-a766-fb00fc099f6e.json new file mode 100644 index 000000000..34eefcbdf --- /dev/null +++ b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/9c8db160-fc92-473f-a766-fb00fc099f6e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/1762652580.03921", + "retrieved_timestamp": "1762652580.039211", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3654503384353515 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2958444769655102 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17371601208459214 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38460416666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11328125 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective/fd05a73b-5b6a-460e-85d5-547710ab6bac.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective/fd05a73b-5b6a-460e-85d5-547710ab6bac.json new file mode 100644 index 000000000..8a96518e5 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective/fd05a73b-5b6a-460e-85d5-547710ab6bac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-7B-Reflective/1762652580.039571", + "retrieved_timestamp": "1762652580.039572", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3921783091087204 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2906778102988436 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20241691842900303 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38999999999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1155252659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B/b4c9ec76-b126-4715-b3cf-c0d8a8a61d44.json b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B/b4c9ec76-b126-4715-b3cf-c0d8a8a61d44.json new file mode 100644 index 000000000..e117f9ff4 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/braindao/DeepSeek-R1-Distill-Qwen-7B/b4c9ec76-b126-4715-b3cf-c0d8a8a61d44.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-7B/1762652580.0386932", + "retrieved_timestamp": "1762652580.038694", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "braindao/DeepSeek-R1-Distill-Qwen-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "braindao/DeepSeek-R1-Distill-Qwen-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39679938119744496 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2886778102988436 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19184290030211482 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37666666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1141123670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/braindao/Qwen2.5-14B/7be8016c-2454-4228-b10d-badba12e845b.json b/data/hfopenllm_v2/alibaba/braindao/Qwen2.5-14B/7be8016c-2454-4228-b10d-badba12e845b.json new file mode 100644 index 000000000..48a07267d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/braindao/Qwen2.5-14B/7be8016c-2454-4228-b10d-badba12e845b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/braindao_Qwen2.5-14B/1762652580.039853", + "retrieved_timestamp": "1762652580.039854", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "braindao/Qwen2.5-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "braindao/Qwen2.5-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.540854931581537 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5852660409288039 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29229607250755285 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3733221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41235416666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48836436170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/CyberCore-Qwen-2.1-7B/131132b7-5b2a-421f-aa02-360ef9b7f206.json b/data/hfopenllm_v2/alibaba/bunnycore/CyberCore-Qwen-2.1-7B/131132b7-5b2a-421f-aa02-360ef9b7f206.json new file mode 100644 index 000000000..df995b7d0 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/CyberCore-Qwen-2.1-7B/131132b7-5b2a-421f-aa02-360ef9b7f206.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_CyberCore-Qwen-2.1-7B/1762652580.0426219", + "retrieved_timestamp": "1762652580.042623", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/CyberCore-Qwen-2.1-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/CyberCore-Qwen-2.1-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5765757080103016 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5572089082936126 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35876132930513593 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4144895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4444813829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/DeepQwen-3B-LCoT-SCE/49243e70-a24d-4e0c-b4c6-4275be1db944.json b/data/hfopenllm_v2/alibaba/bunnycore/DeepQwen-3B-LCoT-SCE/49243e70-a24d-4e0c-b4c6-4275be1db944.json new file mode 100644 index 000000000..16eea3303 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/DeepQwen-3B-LCoT-SCE/49243e70-a24d-4e0c-b4c6-4275be1db944.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_DeepQwen-3B-LCoT-SCE/1762652580.042877", + "retrieved_timestamp": "1762652580.042878", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/DeepQwen-3B-LCoT-SCE", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/DeepQwen-3B-LCoT-SCE" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4489809261647983 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45123121380305237 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24697885196374622 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35139583333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3289561170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.396 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/7e6a55fb-da39-4b16-a59b-70635e636c02.json b/data/hfopenllm_v2/alibaba/bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/7e6a55fb-da39-4b16-a59b-70635e636c02.json new file mode 100644 index 000000000..f99a6b123 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/7e6a55fb-da39-4b16-a59b-70635e636c02.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/1762652580.043099", + "retrieved_timestamp": "1762652580.043099", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39010492160800014 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3494110718041537 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16540785498489427 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3663125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2508311170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/FwF-Qwen-7B-0.1/bfaeefb1-93c9-470b-9376-9c67a1d20862.json b/data/hfopenllm_v2/alibaba/bunnycore/FwF-Qwen-7B-0.1/bfaeefb1-93c9-470b-9376-9c67a1d20862.json new file mode 100644 index 000000000..8dd4a817c --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/FwF-Qwen-7B-0.1/bfaeefb1-93c9-470b-9376-9c67a1d20862.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_FwF-Qwen-7B-0.1/1762652580.04422", + "retrieved_timestamp": "1762652580.044221", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/FwF-Qwen-7B-0.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/FwF-Qwen-7B-0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30045390674521383 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5019272523147252 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2764350453172205 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39520833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4060837765957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/FwF-Qwen-7B-0.2/ee7b9254-5e4a-46a0-a8b3-2ecc1708e6ab.json b/data/hfopenllm_v2/alibaba/bunnycore/FwF-Qwen-7B-0.2/ee7b9254-5e4a-46a0-a8b3-2ecc1708e6ab.json new file mode 100644 index 000000000..2b01dac53 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/FwF-Qwen-7B-0.2/ee7b9254-5e4a-46a0-a8b3-2ecc1708e6ab.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_FwF-Qwen-7B-0.2/1762652580.044472", + "retrieved_timestamp": "1762652580.0444732", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/FwF-Qwen-7B-0.2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/FwF-Qwen-7B-0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44790710869382133 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5596406929346521 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4259818731117825 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42178125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4382480053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Sky-T1/33cc8f90-d019-49d9-8220-d66260659435.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Sky-T1/33cc8f90-d019-49d9-8220-d66260659435.json new file mode 100644 index 000000000..5f4e2ab61 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Sky-T1/33cc8f90-d019-49d9-8220-d66260659435.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Deep-Sky-T1/1762652580.0542989", + "retrieved_timestamp": "1762652580.0542998", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen-2.5-7B-Deep-Sky-T1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen-2.5-7B-Deep-Sky-T1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42080457630198986 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4139878251775055 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40181249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2103557180851064 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v1/a9fe98a7-e143-4100-99cd-adea90917c4c.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v1/a9fe98a7-e143-4100-99cd-adea90917c4c.json new file mode 100644 index 000000000..9cdf30ea7 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v1/a9fe98a7-e143-4100-99cd-adea90917c4c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Deep-Stock-v1/1762652580.054558", + "retrieved_timestamp": "1762652580.054559", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen-2.5-7B-Deep-Stock-v1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5695066867023941 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5361336083539997 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26435045317220546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4108958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40658244680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v4/56ae78dc-3cae-43b0-afc9-e6fac3c6556a.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v4/56ae78dc-3cae-43b0-afc9-e6fac3c6556a.json new file mode 100644 index 000000000..699244786 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v4/56ae78dc-3cae-43b0-afc9-e6fac3c6556a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Deep-Stock-v4/1762652580.054795", + "retrieved_timestamp": "1762652580.054796", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen-2.5-7B-Deep-Stock-v4", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7752862405085175 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5452765042799131 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48942598187311176 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41269791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4341755319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v5/39ce157b-e374-4963-8b40-6393835574f5.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v5/39ce157b-e374-4963-8b40-6393835574f5.json new file mode 100644 index 000000000..7ad5aeb50 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Deep-Stock-v5/39ce157b-e374-4963-8b40-6393835574f5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Deep-Stock-v5/1762652580.05501", + "retrieved_timestamp": "1762652580.055011", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen-2.5-7B-Deep-Stock-v5", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45090471061228654 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4672461238794705 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1472809667673716 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3648229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28316156914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Exp-Sce/c57286a9-ee0c-48e7-814e-8f2aa8e9688a.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Exp-Sce/c57286a9-ee0c-48e7-814e-8f2aa8e9688a.json new file mode 100644 index 000000000..f9c862b49 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Exp-Sce/c57286a9-ee0c-48e7-814e-8f2aa8e9688a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Exp-Sce/1762652580.055233", + "retrieved_timestamp": "1762652580.055233", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen-2.5-7B-Exp-Sce", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen-2.5-7B-Exp-Sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.765169749597734 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5505865059891896 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3255287009063444 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44302083333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42586436170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-R1-Stock/672e66ed-80e2-4b45-b52c-d9265f8efac8.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-R1-Stock/672e66ed-80e2-4b45-b52c-d9265f8efac8.json new file mode 100644 index 000000000..19496e84c --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-R1-Stock/672e66ed-80e2-4b45-b52c-d9265f8efac8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-R1-Stock/1762652580.055454", + "retrieved_timestamp": "1762652580.055455", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen-2.5-7B-R1-Stock", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen-2.5-7B-R1-Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7573261169253137 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5393363105747148 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5007552870090635 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3993645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.429438164893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke/af89079b-b84e-48f1-876a-ebf2d933d91e.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke/af89079b-b84e-48f1-876a-ebf2d933d91e.json new file mode 100644 index 000000000..d55ec55db --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke/af89079b-b84e-48f1-876a-ebf2d933d91e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Stock-Deep-Bespoke/1762652580.0556722", + "retrieved_timestamp": "1762652580.0556731", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5206219497599702 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49203477801491813 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18882175226586104 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4068020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3579621010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7b-S1k/e7394d5d-4253-4a53-8a0a-73b0a41e62a4.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7b-S1k/e7394d5d-4253-4a53-8a0a-73b0a41e62a4.json new file mode 100644 index 000000000..c319bd143 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen-2.5-7b-S1k/e7394d5d-4253-4a53-8a0a-73b0a41e62a4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7b-S1k/1762652580.055886", + "retrieved_timestamp": "1762652580.0558872", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen-2.5-7b-S1k", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen-2.5-7b-S1k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7162351449708995 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5562750208035135 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4780966767371601 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4071458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4382480053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-1.5B-Model-Stock/865ffa1b-af08-416e-8de0-a16091d4ec79.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-1.5B-Model-Stock/865ffa1b-af08-416e-8de0-a16091d4ec79.json new file mode 100644 index 000000000..129f7c18b --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-1.5B-Model-Stock/865ffa1b-af08-416e-8de0-a16091d4ec79.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-1.5B-Model-Stock/1762652580.0561001", + "retrieved_timestamp": "1762652580.056101", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-1.5B-Model-Stock", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-1.5B-Model-Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18292574812608325 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2873695911207613 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3674270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11003989361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.776 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v2/e949a47b-85f9-4072-8302-8bfef92579d9.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v2/e949a47b-85f9-4072-8302-8bfef92579d9.json new file mode 100644 index 000000000..eb8637a39 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v2/e949a47b-85f9-4072-8302-8bfef92579d9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock-v2/1762652580.0565188", + "retrieved_timestamp": "1762652580.05652", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-3B-Model-Stock-v2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-3B-Model-Stock-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6490157227268093 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46774789186946836 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3867069486404834 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3914583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3269614361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.396 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v3.1/744d1978-7aa3-44b6-91a0-664383a66f8b.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v3.1/744d1978-7aa3-44b6-91a0-664383a66f8b.json new file mode 100644 index 000000000..1a83383b0 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v3.1/744d1978-7aa3-44b6-91a0-664383a66f8b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock-v3.1/1762652580.056732", + "retrieved_timestamp": "1762652580.056733", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-3B-Model-Stock-v3.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-3B-Model-Stock-v3.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6480915083090644 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.473722298403459 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38972809667673713 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39679166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3289561170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.396 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v3.2/139f2e38-0b98-4bfe-82b0-99a6e6b51e7f.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v3.2/139f2e38-0b98-4bfe-82b0-99a6e6b51e7f.json new file mode 100644 index 000000000..3e81e3a01 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v3.2/139f2e38-0b98-4bfe-82b0-99a6e6b51e7f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock-v3.2/1762652580.05695", + "retrieved_timestamp": "1762652580.05695", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-3B-Model-Stock-v3.2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-3B-Model-Stock-v3.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6353021095138676 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4727417689283166 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37537764350453173 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39279166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3293716755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.396 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v4.1/8348f83b-0739-411f-8b87-bd9d5e871ab3.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v4.1/8348f83b-0739-411f-8b87-bd9d5e871ab3.json new file mode 100644 index 000000000..e0772b17d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock-v4.1/8348f83b-0739-411f-8b87-bd9d5e871ab3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock-v4.1/1762652580.0571678", + "retrieved_timestamp": "1762652580.057169", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-3B-Model-Stock-v4.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-3B-Model-Stock-v4.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6380747527671025 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48202557906199406 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3768882175226586 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39409374999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3386801861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.396 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock/4dcf1412-4182-40bd-bd1a-2246e29f18e9.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock/4dcf1412-4182-40bd-bd1a-2246e29f18e9.json new file mode 100644 index 000000000..e18648a7e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-Model-Stock/4dcf1412-4182-40bd-bd1a-2246e29f18e9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock/1762652580.056308", + "retrieved_timestamp": "1762652580.056309", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-3B-Model-Stock", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-3B-Model-Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6380747527671025 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4712481909242632 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37990936555891236 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39415625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3249667553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.396 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Mix/f43b9387-56a9-4c21-850c-5cfda84fc8b5.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Mix/f43b9387-56a9-4c21-850c-5cfda84fc8b5.json new file mode 100644 index 000000000..04cc00cc8 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Mix/f43b9387-56a9-4c21-850c-5cfda84fc8b5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-RP-Mix/1762652580.057388", + "retrieved_timestamp": "1762652580.057389", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-3B-RP-Mix", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-3B-RP-Mix" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5720543712903984 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4894378989397821 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21525679758308158 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42844791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37275598404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Thinker-V2/497c8c15-1b77-4468-b33d-efa190c28e78.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Thinker-V2/497c8c15-1b77-4468-b33d-efa190c28e78.json new file mode 100644 index 000000000..6292da827 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Thinker-V2/497c8c15-1b77-4468-b33d-efa190c28e78.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-RP-Thinker-V2/1762652580.057826", + "retrieved_timestamp": "1762652580.057826", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-3B-RP-Thinker-V2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-3B-RP-Thinker-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6419965691033125 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46784408133522204 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38293051359516617 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.398125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271276595744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Thinker/80cadd5b-ebbd-4f2f-912b-5d944650e2b1.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Thinker/80cadd5b-ebbd-4f2f-912b-5d944650e2b1.json new file mode 100644 index 000000000..282778bc6 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-3B-RP-Thinker/80cadd5b-ebbd-4f2f-912b-5d944650e2b1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-RP-Thinker/1762652580.0576031", + "retrieved_timestamp": "1762652580.057604", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-3B-RP-Thinker", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-3B-RP-Thinker" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.589414974489909 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4164134011392067 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33534743202416917 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3287291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3149933510638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-CyberRombos/1dc11c68-ce65-4a5b-9f75-4cdf1775bfc6.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-CyberRombos/1dc11c68-ce65-4a5b-9f75-4cdf1775bfc6.json new file mode 100644 index 000000000..7a74aea23 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-CyberRombos/1dc11c68-ce65-4a5b-9f75-4cdf1775bfc6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-CyberRombos/1762652580.058041", + "retrieved_timestamp": "1762652580.058042", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-7B-CyberRombos", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-7B-CyberRombos" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.751830698103255 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5464960546716063 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4962235649546828 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41254166666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4390791223404255 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-Fuse-Exp/f435a5b0-cc12-4603-b7b0-4625dc547ed2.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-Fuse-Exp/f435a5b0-cc12-4603-b7b0-4625dc547ed2.json new file mode 100644 index 000000000..ea0480ad6 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-Fuse-Exp/f435a5b0-cc12-4603-b7b0-4625dc547ed2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-Fuse-Exp/1762652580.0583198", + "retrieved_timestamp": "1762652580.058321", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-7B-Fuse-Exp", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-7B-Fuse-Exp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5468501354184675 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5108680600425207 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31419939577039274 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45728125000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3308676861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3/daf38e27-1149-44a8-84f2-93f842f4740a.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3/daf38e27-1149-44a8-84f2-93f842f4740a.json new file mode 100644 index 000000000..ecd979093 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3/daf38e27-1149-44a8-84f2-93f842f4740a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-MixStock-Sce-V0.3/1762652580.058998", + "retrieved_timestamp": "1762652580.058999", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21197644472222593 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3479005166788895 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25755287009063443 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3713958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17794215425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-MixStock-V0.1/4a5bb50c-017d-421d-8ea1-21a8316db0f4.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-MixStock-V0.1/4a5bb50c-017d-421d-8ea1-21a8316db0f4.json new file mode 100644 index 000000000..be0fc7e4e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-MixStock-V0.1/4a5bb50c-017d-421d-8ea1-21a8316db0f4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-MixStock-V0.1/1762652580.059214", + "retrieved_timestamp": "1762652580.059214", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-7B-MixStock-V0.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-7B-MixStock-V0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7673428724672757 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5479100568012056 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31722054380664655 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.441625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4256150265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-R1-Bespoke-Stock/20de3a0f-fad0-4832-863e-2b2049037c4f.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-R1-Bespoke-Stock/20de3a0f-fad0-4832-863e-2b2049037c4f.json new file mode 100644 index 000000000..a5318692a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-R1-Bespoke-Stock/20de3a0f-fad0-4832-863e-2b2049037c4f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-R1-Bespoke-Stock/1762652580.059437", + "retrieved_timestamp": "1762652580.059438", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-7B-R1-Bespoke-Stock", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-7B-R1-Bespoke-Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3726445830396681 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48221362910675625 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20468277945619334 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3926354166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34715757978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-R1-Bespoke-Task/0f460b31-7249-4e2d-a614-d1230e95f3cf.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-R1-Bespoke-Task/0f460b31-7249-4e2d-a614-d1230e95f3cf.json new file mode 100644 index 000000000..9f10c73ee --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-R1-Bespoke-Task/0f460b31-7249-4e2d-a614-d1230e95f3cf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-R1-Bespoke-Task/1762652580.059654", + "retrieved_timestamp": "1762652580.059655", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-7B-R1-Bespoke-Task", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-7B-R1-Bespoke-Task" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3786641666334215 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41495531490332715 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1782477341389728 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3568854166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2687832446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-1M-Thinker/1879a765-f4ab-4bad-9525-47f428b43220.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-1M-Thinker/1879a765-f4ab-4bad-9525-47f428b43220.json new file mode 100644 index 000000000..e142670a4 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-1M-Thinker/1879a765-f4ab-4bad-9525-47f428b43220.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-RRP-1M-Thinker/1762652580.060085", + "retrieved_timestamp": "1762652580.060086", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-7B-RRP-1M-Thinker", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-7B-RRP-1M-Thinker" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23081091503876383 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3481907488085136 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2719033232628399 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3767291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1768617021276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-1M/9ec2ac0c-21e8-4c9c-ba5f-69ad284400bb.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-1M/9ec2ac0c-21e8-4c9c-ba5f-69ad284400bb.json new file mode 100644 index 000000000..963d44875 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-1M/9ec2ac0c-21e8-4c9c-ba5f-69ad284400bb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-RRP-1M/1762652580.059867", + "retrieved_timestamp": "1762652580.0598679", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-7B-RRP-1M", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-7B-RRP-1M" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7481338404322753 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.545239229980545 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.324773413897281 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44826041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4266123670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-ID/85b10038-d136-4be7-8e04-7298ddb4f7d2.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-ID/85b10038-d136-4be7-8e04-7298ddb4f7d2.json new file mode 100644 index 000000000..79f113ef7 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-RRP-ID/85b10038-d136-4be7-8e04-7298ddb4f7d2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-RRP-ID/1762652580.0603101", + "retrieved_timestamp": "1762652580.0603101", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-7B-RRP-ID", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-7B-RRP-ID" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.747259493698941 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5479543512061099 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.486404833836858 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41796875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4387466755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-Sky-R1-Mini/c1f39d51-d7a2-4fee-ba35-ef4e0d429b29.json b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-Sky-R1-Mini/c1f39d51-d7a2-4fee-ba35-ef4e0d429b29.json new file mode 100644 index 000000000..de1beac6b --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/Qwen2.5-7B-Sky-R1-Mini/c1f39d51-d7a2-4fee-ba35-ef4e0d429b29.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-Sky-R1-Mini/1762652580.061045", + "retrieved_timestamp": "1762652580.0610461", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-7B-Sky-R1-Mini", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-7B-Sky-R1-Mini" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23048622100471194 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3502939195575525 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3448229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12533244680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/bunnycore/QwenMosaic-7B/4fcee29d-6351-4875-995d-81834fd878c3.json b/data/hfopenllm_v2/alibaba/bunnycore/QwenMosaic-7B/4fcee29d-6351-4875-995d-81834fd878c3.json new file mode 100644 index 000000000..22a4354ed --- /dev/null +++ b/data/hfopenllm_v2/alibaba/bunnycore/QwenMosaic-7B/4fcee29d-6351-4875-995d-81834fd878c3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_QwenMosaic-7B/1762652580.061329", + "retrieved_timestamp": "1762652580.0613298", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/QwenMosaic-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "bunnycore/QwenMosaic-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5819215237791282 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5564132127895585 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44410876132930516 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4163854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43101728723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B/4b0c69d9-1801-4a54-9554-d8dcff88f9a3.json b/data/hfopenllm_v2/alibaba/cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B/4b0c69d9-1801-4a54-9554-d8dcff88f9a3.json new file mode 100644 index 000000000..fe5a50f4c --- /dev/null +++ b/data/hfopenllm_v2/alibaba/cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B/4b0c69d9-1801-4a54-9554-d8dcff88f9a3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_Dolphin3.0-Qwen2.5-0.5B/1762652580.112457", + "retrieved_timestamp": "1762652580.112458", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4697136930012367 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31142229157184026 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2348993288590604 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35545833333333327 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14128989361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/cognitivecomputations/dolphin-2.9.2-qwen2-72b/5d3c9637-0558-4a2e-9950-8e7017d013f8.json b/data/hfopenllm_v2/alibaba/cognitivecomputations/dolphin-2.9.2-qwen2-72b/5d3c9637-0558-4a2e-9950-8e7017d013f8.json new file mode 100644 index 000000000..9f7d4d866 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/cognitivecomputations/dolphin-2.9.2-qwen2-72b/5d3c9637-0558-4a2e-9950-8e7017d013f8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-qwen2-72b/1762652580.114711", + "retrieved_timestamp": "1762652580.114712", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cognitivecomputations/dolphin-2.9.2-qwen2-72b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "cognitivecomputations/dolphin-2.9.2-qwen2-72b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6343778950961227 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6296364939584073 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802114803625378 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3699664429530201 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45207291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.547124335106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/cognitivecomputations/dolphin-2.9.2-qwen2-7b/c04e8c21-3ae1-457a-9609-682341323a88.json b/data/hfopenllm_v2/alibaba/cognitivecomputations/dolphin-2.9.2-qwen2-7b/c04e8c21-3ae1-457a-9609-682341323a88.json new file mode 100644 index 000000000..5e6e0f4ff --- /dev/null +++ b/data/hfopenllm_v2/alibaba/cognitivecomputations/dolphin-2.9.2-qwen2-7b/c04e8c21-3ae1-457a-9609-682341323a88.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-qwen2-7b/1762652580.114933", + "retrieved_timestamp": "1762652580.114934", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cognitivecomputations/dolphin-2.9.2-qwen2-7b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "cognitivecomputations/dolphin-2.9.2-qwen2-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3534599307614906 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48938263759195594 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13444108761329304 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41914583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4050864361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/d38f0e3a-e89e-4af6-95b2-8230b6a84ec3.json b/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/d38f0e3a-e89e-4af6-95b2-8230b6a84ec3.json new file mode 100644 index 000000000..93d29e7a1 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/d38f0e3a-e89e-4af6-95b2-8230b6a84ec3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B/1762652580.121964", + "retrieved_timestamp": "1762652580.1219652", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34634104176917246 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32409879947333436 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1691842900302115 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36345833333333327 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11868351063829788 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/77e70ef3-fef2-4b75-9221-b165ec29f31e.json b/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/77e70ef3-fef2-4b75-9221-b165ec29f31e.json new file mode 100644 index 000000000..e2cbf872e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B/77e70ef3-fef2-4b75-9221-b165ec29f31e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Qwen-14B/1762652580.122241", + "retrieved_timestamp": "1762652580.122248", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43816517950150047 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5905573130283358 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5702416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3875838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.536625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4666722074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/6731c6b8-0b23-4fc2-b284-01025ce30887.json b/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/6731c6b8-0b23-4fc2-b284-01025ce30887.json new file mode 100644 index 000000000..637e2740b --- /dev/null +++ b/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/6731c6b8-0b23-4fc2-b284-01025ce30887.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Qwen-32B/1762652580.12255", + "retrieved_timestamp": "1762652580.1225522", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4186314534324481 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41969150892898055 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17069486404833836 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4526041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46866688829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4cb8eae2-bc55-4adb-a4eb-1fc9eb29d891.json b/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4cb8eae2-bc55-4adb-a4eb-1fc9eb29d891.json new file mode 100644 index 000000000..f00702a37 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/4cb8eae2-bc55-4adb-a4eb-1fc9eb29d891.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Qwen-7B/1762652580.1228092", + "retrieved_timestamp": "1762652580.1228101", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40376866713653103 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34425676981862185 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19561933534743203 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36628124999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2321309840425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/dfurman/Qwen2-72B-Orpo-v0.1/b197728d-b390-45a8-8adc-ed8567b628da.json b/data/hfopenllm_v2/alibaba/dfurman/Qwen2-72B-Orpo-v0.1/b197728d-b390-45a8-8adc-ed8567b628da.json new file mode 100644 index 000000000..b70ac13d1 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/dfurman/Qwen2-72B-Orpo-v0.1/b197728d-b390-45a8-8adc-ed8567b628da.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dfurman_Qwen2-72B-Orpo-v0.1/1762652580.125584", + "retrieved_timestamp": "1762652580.1255848", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dfurman/Qwen2-72B-Orpo-v0.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "dfurman/Qwen2-72B-Orpo-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7879759039348928 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6969024790545039 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40558912386706947 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38422818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47842708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5454621010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.699 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/ehristoforu/QwenQwen2.5-7B-IT-Dare/09deb823-536f-4afc-95bf-ebb0a8eb2e00.json b/data/hfopenllm_v2/alibaba/ehristoforu/QwenQwen2.5-7B-IT-Dare/09deb823-536f-4afc-95bf-ebb0a8eb2e00.json new file mode 100644 index 000000000..bdfaf1c97 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/ehristoforu/QwenQwen2.5-7B-IT-Dare/09deb823-536f-4afc-95bf-ebb0a8eb2e00.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_QwenQwen2.5-7B-IT-Dare/1762652580.1400871", + "retrieved_timestamp": "1762652580.140088", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/QwenQwen2.5-7B-IT-Dare", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "ehristoforu/QwenQwen2.5-7B-IT-Dare" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7509064836855099 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5397962708415814 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5090634441087614 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4033645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4289394946808511 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/ehristoforu/QwenQwen2.5-7B-IT/30f8faa5-777f-47bc-b128-f31b950079a3.json b/data/hfopenllm_v2/alibaba/ehristoforu/QwenQwen2.5-7B-IT/30f8faa5-777f-47bc-b128-f31b950079a3.json new file mode 100644 index 000000000..4cfdd3627 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/ehristoforu/QwenQwen2.5-7B-IT/30f8faa5-777f-47bc-b128-f31b950079a3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_QwenQwen2.5-7B-IT/1762652580.1398232", + "retrieved_timestamp": "1762652580.1398232", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/QwenQwen2.5-7B-IT", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "ehristoforu/QwenQwen2.5-7B-IT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.751830698103255 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5397962708415814 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5090634441087614 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4033645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4289394946808511 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/ehristoforu/RQwen-v0.1/93187c79-f1a4-45f9-9d95-a254a185f7a4.json b/data/hfopenllm_v2/alibaba/ehristoforu/RQwen-v0.1/93187c79-f1a4-45f9-9d95-a254a185f7a4.json new file mode 100644 index 000000000..625dbebd0 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/ehristoforu/RQwen-v0.1/93187c79-f1a4-45f9-9d95-a254a185f7a4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_RQwen-v0.1/1762652580.140311", + "retrieved_timestamp": "1762652580.140312", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/RQwen-v0.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "ehristoforu/RQwen-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7624968417133207 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6446435015804635 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4645015105740181 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32550335570469796 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41390625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5201961436170213 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/ehristoforu/RQwen-v0.2/69318100-73ee-47f4-96b2-6e7b310fbcd1.json b/data/hfopenllm_v2/alibaba/ehristoforu/RQwen-v0.2/69318100-73ee-47f4-96b2-6e7b310fbcd1.json new file mode 100644 index 000000000..2670e8a91 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/ehristoforu/RQwen-v0.2/69318100-73ee-47f4-96b2-6e7b310fbcd1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_RQwen-v0.2/1762652580.140525", + "retrieved_timestamp": "1762652580.140526", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/RQwen-v0.2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "ehristoforu/RQwen-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7503568309862276 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6426888858891955 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3270392749244713 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.337248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4206666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.515874335106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/ehristoforu/coolqwen-3b-it/5aab957b-f25b-4208-9bf8-2d16887245bc.json b/data/hfopenllm_v2/alibaba/ehristoforu/coolqwen-3b-it/5aab957b-f25b-4208-9bf8-2d16887245bc.json new file mode 100644 index 000000000..f94dc3c97 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/ehristoforu/coolqwen-3b-it/5aab957b-f25b-4208-9bf8-2d16887245bc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_coolqwen-3b-it/1762652580.140961", + "retrieved_timestamp": "1762652580.1409621", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/coolqwen-3b-it", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "ehristoforu/coolqwen-3b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6472670292601409 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.485089343991756 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36706948640483383 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41251041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3601230053191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.085 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/ehristoforu/frqwen2.5-from7b-duable4layers-it/b2c0f0f2-3c1d-4b2a-a82d-24001cbfd3d7.json b/data/hfopenllm_v2/alibaba/ehristoforu/frqwen2.5-from7b-duable4layers-it/b2c0f0f2-3c1d-4b2a-a82d-24001cbfd3d7.json new file mode 100644 index 000000000..65550aa5f --- /dev/null +++ b/data/hfopenllm_v2/alibaba/ehristoforu/frqwen2.5-from7b-duable4layers-it/b2c0f0f2-3c1d-4b2a-a82d-24001cbfd3d7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_frqwen2.5-from7b-duable4layers-it/1762652580.1428769", + "retrieved_timestamp": "1762652580.1428769", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/frqwen2.5-from7b-duable4layers-it", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "ehristoforu/frqwen2.5-from7b-duable4layers-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7728881589737453 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5263561044354216 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4509063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4165729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4126496010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 8.545 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/ehristoforu/frqwen2.5-from7b-it/26034d5d-5d52-40d8-aa9b-e90dbd255903.json b/data/hfopenllm_v2/alibaba/ehristoforu/frqwen2.5-from7b-it/26034d5d-5d52-40d8-aa9b-e90dbd255903.json new file mode 100644 index 000000000..cd5eca5a4 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/ehristoforu/frqwen2.5-from7b-it/26034d5d-5d52-40d8-aa9b-e90dbd255903.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_frqwen2.5-from7b-it/1762652580.143308", + "retrieved_timestamp": "1762652580.143309", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/frqwen2.5-from7b-it", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "ehristoforu/frqwen2.5-from7b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6532123654126606 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5142906815349029 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29229607250755285 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4085729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3976894946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 13.206 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/ehristoforu/qwen2.5-test-32b-it/606d699f-c7ac-4e5b-b5a3-5bd43f0a3ff6.json b/data/hfopenllm_v2/alibaba/ehristoforu/qwen2.5-test-32b-it/606d699f-c7ac-4e5b-b5a3-5bd43f0a3ff6.json new file mode 100644 index 000000000..a8925d02e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/ehristoforu/qwen2.5-test-32b-it/606d699f-c7ac-4e5b-b5a3-5bd43f0a3ff6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_qwen2.5-test-32b-it/1762652580.144918", + "retrieved_timestamp": "1762652580.1449192", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/qwen2.5-test-32b-it", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "ehristoforu/qwen2.5-test-32b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7889499860370484 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.708059329453303 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5974320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3640939597315436 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4578125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5765458776595744 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/ehristoforu/qwen2.5-with-lora-think-3b-it/6c40f966-753b-4301-8c9b-f7b4905c0b68.json b/data/hfopenllm_v2/alibaba/ehristoforu/qwen2.5-with-lora-think-3b-it/6c40f966-753b-4301-8c9b-f7b4905c0b68.json new file mode 100644 index 000000000..9698b20c3 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/ehristoforu/qwen2.5-with-lora-think-3b-it/6c40f966-753b-4301-8c9b-f7b4905c0b68.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_qwen2.5-with-lora-think-3b-it/1762652580.1451252", + "retrieved_timestamp": "1762652580.1451259", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/qwen2.5-with-lora-think-3b-it", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "ehristoforu/qwen2.5-with-lora-think-3b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5319374814381397 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4686847308109022 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.236404833836858 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43095833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3402593085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/freewheelin/free-evo-qwen72b-v0.8-re/cfb071af-7283-4155-8ce1-40f751dd46ec.json b/data/hfopenllm_v2/alibaba/freewheelin/free-evo-qwen72b-v0.8-re/cfb071af-7283-4155-8ce1-40f751dd46ec.json new file mode 100644 index 000000000..8067752eb --- /dev/null +++ b/data/hfopenllm_v2/alibaba/freewheelin/free-evo-qwen72b-v0.8-re/cfb071af-7283-4155-8ce1-40f751dd46ec.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/freewheelin_free-evo-qwen72b-v0.8-re/1762652580.161332", + "retrieved_timestamp": "1762652580.161333", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "freewheelin/free-evo-qwen72b-v0.8-re", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "freewheelin/free-evo-qwen72b-v0.8-re" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.533086654521115 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6127477065378042 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18051359516616314 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3565436241610738 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4871666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4870345744680851 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 72.288 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/godlikehhd/ifd_2500_qwen/84ad6756-cb9d-4303-8e7a-395c1dc7c222.json b/data/hfopenllm_v2/alibaba/godlikehhd/ifd_2500_qwen/84ad6756-cb9d-4303-8e7a-395c1dc7c222.json new file mode 100644 index 000000000..a49c77f81 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/godlikehhd/ifd_2500_qwen/84ad6756-cb9d-4303-8e7a-395c1dc7c222.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_ifd_2500_qwen/1762652580.170526", + "retrieved_timestamp": "1762652580.170526", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/ifd_2500_qwen", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "godlikehhd/ifd_2500_qwen" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33647388928044253 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42983047351897224 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09818731117824774 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36146875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2921376329787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_correct_all_sample_2500_qwen/b481d1bd-e678-4b78-aecb-d43a561dd969.json b/data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_correct_all_sample_2500_qwen/b481d1bd-e678-4b78-aecb-d43a561dd969.json new file mode 100644 index 000000000..24a6d9d8a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_correct_all_sample_2500_qwen/b481d1bd-e678-4b78-aecb-d43a561dd969.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_ifd_new_correct_all_sample_2500_qwen/1762652580.170775", + "retrieved_timestamp": "1762652580.1707761", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/ifd_new_correct_all_sample_2500_qwen", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "godlikehhd/ifd_new_correct_all_sample_2500_qwen" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33757319467900726 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4019641175400575 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09592145015105741 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3561666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2888962765957447 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_correct_sample_2500_qwen/c42196be-c20b-413d-8870-f10759058098.json b/data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_correct_sample_2500_qwen/c42196be-c20b-413d-8870-f10759058098.json new file mode 100644 index 000000000..7c36ac910 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_correct_sample_2500_qwen/c42196be-c20b-413d-8870-f10759058098.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_ifd_new_correct_sample_2500_qwen/1762652580.170979", + "retrieved_timestamp": "1762652580.1709802", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/ifd_new_correct_sample_2500_qwen", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "godlikehhd/ifd_new_correct_sample_2500_qwen" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33974631754854895 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41103125849665423 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3078859060402685 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3626770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.293218085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_qwen_2500/8d8663a1-12f6-4e88-af3d-784ff86e8c59.json b/data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_qwen_2500/8d8663a1-12f6-4e88-af3d-784ff86e8c59.json new file mode 100644 index 000000000..48951a77c --- /dev/null +++ b/data/hfopenllm_v2/alibaba/godlikehhd/ifd_new_qwen_2500/8d8663a1-12f6-4e88-af3d-784ff86e8c59.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_ifd_new_qwen_2500/1762652580.171179", + "retrieved_timestamp": "1762652580.17118", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/ifd_new_qwen_2500", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "godlikehhd/ifd_new_qwen_2500" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.323959316834887 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41598162527775745 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11178247734138973 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3589583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29105718085106386 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/godlikehhd/qwen-2.5-1.5b-cherry/a0621e6d-4178-49c9-aa2b-f56930884b82.json b/data/hfopenllm_v2/alibaba/godlikehhd/qwen-2.5-1.5b-cherry/a0621e6d-4178-49c9-aa2b-f56930884b82.json new file mode 100644 index 000000000..ebfaec91a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/godlikehhd/qwen-2.5-1.5b-cherry/a0621e6d-4178-49c9-aa2b-f56930884b82.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_qwen-2.5-1.5b-cherry/1762652580.1715672", + "retrieved_timestamp": "1762652580.1715689", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/qwen-2.5-1.5b-cherry", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "godlikehhd/qwen-2.5-1.5b-cherry" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28933784580468713 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40357573315752204 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10196374622356495 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.345625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29230385638297873 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.772 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/godlikehhd/qwen_2.5-1.5b-cherry_new/dd0260dd-59f7-4b3d-8f9c-60b297c07a1b.json b/data/hfopenllm_v2/alibaba/godlikehhd/qwen_2.5-1.5b-cherry_new/dd0260dd-59f7-4b3d-8f9c-60b297c07a1b.json new file mode 100644 index 000000000..fabac2360 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/godlikehhd/qwen_2.5-1.5b-cherry_new/dd0260dd-59f7-4b3d-8f9c-60b297c07a1b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_qwen_2.5-1.5b-cherry_new/1762652580.171904", + "retrieved_timestamp": "1762652580.171905", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/qwen_2.5-1.5b-cherry_new", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "godlikehhd/qwen_2.5-1.5b-cherry_new" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3120442647730245 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4149628386006759 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09667673716012085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34959375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28939494680851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/godlikehhd/qwen_full_data_alpaca/746630a6-de1d-4976-9168-d8ff06980904.json b/data/hfopenllm_v2/alibaba/godlikehhd/qwen_full_data_alpaca/746630a6-de1d-4976-9168-d8ff06980904.json new file mode 100644 index 000000000..51da10fab --- /dev/null +++ b/data/hfopenllm_v2/alibaba/godlikehhd/qwen_full_data_alpaca/746630a6-de1d-4976-9168-d8ff06980904.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_qwen_full_data_alpaca/1762652580.1721501", + "retrieved_timestamp": "1762652580.172151", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/qwen_full_data_alpaca", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "godlikehhd/qwen_full_data_alpaca" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3136178672588731 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4229212208733662 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09214501510574018 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40515625000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28507313829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/godlikehhd/qwen_ins_ans_2500/7f577380-2691-4906-af13-8ca3011e6316.json b/data/hfopenllm_v2/alibaba/godlikehhd/qwen_ins_ans_2500/7f577380-2691-4906-af13-8ca3011e6316.json new file mode 100644 index 000000000..f1e005c53 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/godlikehhd/qwen_ins_ans_2500/7f577380-2691-4906-af13-8ca3011e6316.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_qwen_ins_ans_2500/1762652580.172384", + "retrieved_timestamp": "1762652580.172385", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/qwen_ins_ans_2500", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "godlikehhd/qwen_ins_ans_2500" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2698041197356348 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4073950292977672 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11404833836858005 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3588645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28091755319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.1/9b6c775b-ef08-4e57-8441-52d7887615b1.json b/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.1/9b6c775b-ef08-4e57-8441-52d7887615b1.json new file mode 100644 index 000000000..7eb8c63e7 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.1/9b6c775b-ef08-4e57-8441-52d7887615b1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/gz987_qwen2.5-7b-cabs-v0.1/1762652580.187419", + "retrieved_timestamp": "1762652580.18742", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gz987/qwen2.5-7b-cabs-v0.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "gz987/qwen2.5-7b-cabs-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7505817896514582 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5481580818735207 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.479607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.437625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4405751329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.2/7288fa97-efd7-45d5-8769-e0071e9b5488.json b/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.2/7288fa97-efd7-45d5-8769-e0071e9b5488.json new file mode 100644 index 000000000..14c53ba6c --- /dev/null +++ b/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.2/7288fa97-efd7-45d5-8769-e0071e9b5488.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/gz987_qwen2.5-7b-cabs-v0.2/1762652580.18783", + "retrieved_timestamp": "1762652580.187832", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gz987/qwen2.5-7b-cabs-v0.2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "gz987/qwen2.5-7b-cabs-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7417640748768822 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5516262466675281 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4901812688821752 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44286458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43974401595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.3/b664e033-1424-431e-af8d-09a11b449286.json b/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.3/b664e033-1424-431e-af8d-09a11b449286.json new file mode 100644 index 000000000..0f7cec49a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.3/b664e033-1424-431e-af8d-09a11b449286.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/gz987_qwen2.5-7b-cabs-v0.3/1762652580.188173", + "retrieved_timestamp": "1762652580.188174", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gz987/qwen2.5-7b-cabs-v0.3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "gz987/qwen2.5-7b-cabs-v0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7569515552068511 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5494465314719504 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.493202416918429 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44295833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4401595744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.4/8fb7a2aa-3f43-4aaf-b2c0-1770704fcf81.json b/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.4/8fb7a2aa-3f43-4aaf-b2c0-1770704fcf81.json new file mode 100644 index 000000000..a2cc4c572 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/gz987/qwen2.5-7b-cabs-v0.4/8fb7a2aa-3f43-4aaf-b2c0-1770704fcf81.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/gz987_qwen2.5-7b-cabs-v0.4/1762652580.188425", + "retrieved_timestamp": "1762652580.188426", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gz987/qwen2.5-7b-cabs-v0.4", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "gz987/qwen2.5-7b-cabs-v0.4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7582503313430586 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5524401094760039 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48489425981873113 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44295833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4395777925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/Deepseek-qwen-modelstock-2B/15a4291f-4918-43a6-b242-90db88fe4a3d.json b/data/hfopenllm_v2/alibaba/hotmailuser/Deepseek-qwen-modelstock-2B/15a4291f-4918-43a6-b242-90db88fe4a3d.json new file mode 100644 index 000000000..df645e815 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/hotmailuser/Deepseek-qwen-modelstock-2B/15a4291f-4918-43a6-b242-90db88fe4a3d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_Deepseek-qwen-modelstock-2B/1762652580.1914759", + "retrieved_timestamp": "1762652580.191477", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/Deepseek-qwen-modelstock-2B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "hotmailuser/Deepseek-qwen-modelstock-2B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21487431127186973 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3549242330959277 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33987915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34745833333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19107380319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/Qwen2.5-HomerSlerp-7B/9c7dab43-b26d-4cb4-a73c-95bb1e01ffe8.json b/data/hfopenllm_v2/alibaba/hotmailuser/Qwen2.5-HomerSlerp-7B/9c7dab43-b26d-4cb4-a73c-95bb1e01ffe8.json new file mode 100644 index 000000000..8eea96b89 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/hotmailuser/Qwen2.5-HomerSlerp-7B/9c7dab43-b26d-4cb4-a73c-95bb1e01ffe8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_Qwen2.5-HomerSlerp-7B/1762652580.1961112", + "retrieved_timestamp": "1762652580.1961112", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/Qwen2.5-HomerSlerp-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "hotmailuser/Qwen2.5-HomerSlerp-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44878145542715553 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5632506117591088 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33157099697885195 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4383333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4548703457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenModelStock-1.8B/661b1590-f312-447b-a494-1d37ffd93cae.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenModelStock-1.8B/661b1590-f312-447b-a494-1d37ffd93cae.json new file mode 100644 index 000000000..870b138ee --- /dev/null +++ b/data/hfopenllm_v2/alibaba/hotmailuser/QwenModelStock-1.8B/661b1590-f312-447b-a494-1d37ffd93cae.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenModelStock-1.8B/1762652580.196316", + "retrieved_timestamp": "1762652580.196316", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/QwenModelStock-1.8B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "hotmailuser/QwenModelStock-1.8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3263075306852484 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41881762650909504 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09894259818731117 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4359166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2958776595744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-14B/83387977-a8cd-4cdd-abc7-301006380458.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-14B/83387977-a8cd-4cdd-abc7-301006380458.json new file mode 100644 index 000000000..28b5379c5 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-14B/83387977-a8cd-4cdd-abc7-301006380458.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp-14B/1762652580.1965241", + "retrieved_timestamp": "1762652580.196525", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/QwenSlerp-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "hotmailuser/QwenSlerp-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7024716640735471 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6491286917834284 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38368580060422963 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3875838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4634479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5399767287234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-3B/7f53fb66-2c19-434a-acec-7cdcf9fce04d.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-3B/7f53fb66-2c19-434a-acec-7cdcf9fce04d.json new file mode 100644 index 000000000..091bbb337 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-3B/7f53fb66-2c19-434a-acec-7cdcf9fce04d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp-3B/1762652580.1967301", + "retrieved_timestamp": "1762652580.1967309", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/QwenSlerp-3B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "hotmailuser/QwenSlerp-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4333690164319561 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4892345530653528 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27492447129909364 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43166666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3693484042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-7B/4f8db3ee-409a-4bac-ab0a-ee3493d1e842.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-7B/4f8db3ee-409a-4bac-ab0a-ee3493d1e842.json new file mode 100644 index 000000000..9cdf5880f --- /dev/null +++ b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp-7B/4f8db3ee-409a-4bac-ab0a-ee3493d1e842.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp-7B/1762652580.197109", + "retrieved_timestamp": "1762652580.19711", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/QwenSlerp-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "hotmailuser/QwenSlerp-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4672912317096415 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5636352508232924 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34441087613293053 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4409375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45088098404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp2-14B/6732a278-0613-40fd-bdbc-88a586631279.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp2-14B/6732a278-0613-40fd-bdbc-88a586631279.json new file mode 100644 index 000000000..c3cae08f9 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp2-14B/6732a278-0613-40fd-bdbc-88a586631279.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp2-14B/1762652580.197355", + "retrieved_timestamp": "1762652580.197356", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/QwenSlerp2-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "hotmailuser/QwenSlerp2-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7036707048409332 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6492799322983842 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39652567975830816 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48065625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5378989361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp2-3B/cc53c4f9-3c1b-4b21-9aac-ea22dced76c3.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp2-3B/cc53c4f9-3c1b-4b21-9aac-ea22dced76c3.json new file mode 100644 index 000000000..c9091ed86 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp2-3B/cc53c4f9-3c1b-4b21-9aac-ea22dced76c3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp2-3B/1762652580.197566", + "retrieved_timestamp": "1762652580.197566", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/QwenSlerp2-3B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "hotmailuser/QwenSlerp2-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4280486885907171 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4801760257099328 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26057401812688824 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4251875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3741688829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp3-14B/7d2c1ffb-d1e7-4c88-af08-74642ddd8741.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp3-14B/7d2c1ffb-d1e7-4c88-af08-74642ddd8741.json new file mode 100644 index 000000000..909fe07ae --- /dev/null +++ b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSlerp3-14B/7d2c1ffb-d1e7-4c88-af08-74642ddd8741.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp3-14B/1762652580.197938", + "retrieved_timestamp": "1762652580.1979399", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/QwenSlerp3-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "hotmailuser/QwenSlerp3-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6632291209546226 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6266526215170748 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43051359516616317 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36661073825503354 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48078125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5262632978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenSparse-7B/96bbc2c8-bb74-408d-8625-e6bf66b63cd0.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSparse-7B/96bbc2c8-bb74-408d-8625-e6bf66b63cd0.json new file mode 100644 index 000000000..f6b52f755 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/hotmailuser/QwenSparse-7B/96bbc2c8-bb74-408d-8625-e6bf66b63cd0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSparse-7B/1762652580.198252", + "retrieved_timestamp": "1762652580.198254", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/QwenSparse-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "hotmailuser/QwenSparse-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10858632871891026 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28956619468137906 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35622916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11220079787234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenStock-0.5B/72853b4d-cc12-478f-b6f4-977b8fbabfa0.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenStock-0.5B/72853b4d-cc12-478f-b6f4-977b8fbabfa0.json new file mode 100644 index 000000000..31ab8f730 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/hotmailuser/QwenStock-0.5B/72853b4d-cc12-478f-b6f4-977b8fbabfa0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenStock-0.5B/1762652580.198598", + "retrieved_timestamp": "1762652580.1985989", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/QwenStock-0.5B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "hotmailuser/QwenStock-0.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20490742341431845 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911778102988436 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35753125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11668882978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenStock-1.7B/25674b98-92b5-4e2d-97ab-084eabb13db2.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenStock-1.7B/25674b98-92b5-4e2d-97ab-084eabb13db2.json new file mode 100644 index 000000000..ab39ec2ea --- /dev/null +++ b/data/hfopenllm_v2/alibaba/hotmailuser/QwenStock-1.7B/25674b98-92b5-4e2d-97ab-084eabb13db2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenStock-1.7B/1762652580.1988428", + "retrieved_timestamp": "1762652580.198844", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/QwenStock-1.7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "hotmailuser/QwenStock-1.7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32141163224688274 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4187550547805281 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09969788519637462 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44121875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2954621010638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/hotmailuser/QwenStock1-14B/67fd0572-cf55-412d-8ec6-0cb168d3ed08.json b/data/hfopenllm_v2/alibaba/hotmailuser/QwenStock1-14B/67fd0572-cf55-412d-8ec6-0cb168d3ed08.json new file mode 100644 index 000000000..602e47ce0 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/hotmailuser/QwenStock1-14B/67fd0572-cf55-412d-8ec6-0cb168d3ed08.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_QwenStock1-14B/1762652580.1990862", + "retrieved_timestamp": "1762652580.1990871", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/QwenStock1-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "hotmailuser/QwenStock1-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6693240601603745 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6502248812491821 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37009063444108764 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3859060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47811458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5416389627659575 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/69d04754-3779-4408-9aa9-68c9ba65de7a.json b/data/hfopenllm_v2/alibaba/huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/69d04754-3779-4408-9aa9-68c9ba65de7a.json new file mode 100644 index 000000000..442eb0325 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/69d04754-3779-4408-9aa9-68c9ba65de7a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/huihui-ai_DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/1762652580.200386", + "retrieved_timestamp": "1762652580.200386", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42112927033604175 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34869240677927044 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22054380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47006250000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19148936170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jayasuryajsk/Qwen2.5-3B-reasoner/91c0e116-7dc0-4931-ac61-b98bac2af3e0.json b/data/hfopenllm_v2/alibaba/jayasuryajsk/Qwen2.5-3B-reasoner/91c0e116-7dc0-4931-ac61-b98bac2af3e0.json new file mode 100644 index 000000000..c196f2d7a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jayasuryajsk/Qwen2.5-3B-reasoner/91c0e116-7dc0-4931-ac61-b98bac2af3e0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jayasuryajsk_Qwen2.5-3B-reasoner/1762652580.280263", + "retrieved_timestamp": "1762652580.280264", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jayasuryajsk/Qwen2.5-3B-reasoner", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jayasuryajsk/Qwen2.5-3B-reasoner" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4159585455480348 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46511772991620703 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41229166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3482380319148936 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jeanmichela/o-distil-qwen/172e7bfa-b430-4e14-a15a-a54ec5c9133e.json b/data/hfopenllm_v2/alibaba/jeanmichela/o-distil-qwen/172e7bfa-b430-4e14-a15a-a54ec5c9133e.json new file mode 100644 index 000000000..3ca4d79d6 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jeanmichela/o-distil-qwen/172e7bfa-b430-4e14-a15a-a54ec5c9133e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jeanmichela_o-distil-qwen/1762652580.280534", + "retrieved_timestamp": "1762652580.280535", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeanmichela/o-distil-qwen", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jeanmichela/o-distil-qwen" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44823180272787316 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5900367438200601 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3934563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5339895833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46575797872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jebish7/qwen2.5-0.5B-IHA-Hin/5849d742-02eb-4370-8c97-efc5eec4f1ed.json b/data/hfopenllm_v2/alibaba/jebish7/qwen2.5-0.5B-IHA-Hin/5849d742-02eb-4370-8c97-efc5eec4f1ed.json new file mode 100644 index 000000000..8df8381b8 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jebish7/qwen2.5-0.5B-IHA-Hin/5849d742-02eb-4370-8c97-efc5eec4f1ed.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jebish7_qwen2.5-0.5B-IHA-Hin/1762652580.28294", + "retrieved_timestamp": "1762652580.28294", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jebish7/qwen2.5-0.5B-IHA-Hin", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jebish7/qwen2.5-0.5B-IHA-Hin" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14163419726326149 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29891753632624085 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34748958333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.109375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen-7B-nerd-uncensored-v1.0/1812829e-2c91-410e-9e2e-cc758b652e9b.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen-7B-nerd-uncensored-v1.0/1812829e-2c91-410e-9e2e-cc758b652e9b.json new file mode 100644 index 000000000..bbbeea6d2 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen-7B-nerd-uncensored-v1.0/1812829e-2c91-410e-9e2e-cc758b652e9b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen-7B-nerd-uncensored-v1.0/1762652580.283215", + "retrieved_timestamp": "1762652580.2832158", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeffmeloy/Qwen-7B-nerd-uncensored-v1.0", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jeffmeloy/Qwen-7B-nerd-uncensored-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6135952605752737 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5421083753999172 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28700906344410876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47929166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4362533244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-minperplexity-2/593d3d30-f2e8-4ad3-b0ab-4bfed63a0ab5.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-minperplexity-2/593d3d30-f2e8-4ad3-b0ab-4bfed63a0ab5.json new file mode 100644 index 000000000..c875f594a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-minperplexity-2/593d3d30-f2e8-4ad3-b0ab-4bfed63a0ab5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-minperplexity-2/1762652580.28349", + "retrieved_timestamp": "1762652580.2834911", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeffmeloy/Qwen2.5-7B-minperplexity-2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jeffmeloy/Qwen2.5-7B-minperplexity-2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.509730847484674 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.552390586276348 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3013595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46245833333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4345910904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9/45a72c39-9cdb-4fb6-aaf0-d50cc89dfd70.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9/45a72c39-9cdb-4fb6-aaf0-d50cc89dfd70.json new file mode 100644 index 000000000..188fc3baa --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9/45a72c39-9cdb-4fb6-aaf0-d50cc89dfd70.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v0.9/1762652580.2837172", + "retrieved_timestamp": "1762652580.2837179", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6048274134851084 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5469701834138724 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48198958333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4363364361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0/ee2b789c-951d-426e-87e3-232c07d65ade.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0/ee2b789c-951d-426e-87e3-232c07d65ade.json new file mode 100644 index 000000000..74c906439 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0/ee2b789c-951d-426e-87e3-232c07d65ade.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.0/1762652580.283937", + "retrieved_timestamp": "1762652580.283938", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7695159953368174 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.541762771903226 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47129909365558914 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4551145833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4253656914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1/2316b408-c94b-471e-b64b-c1f8f345868e.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1/2316b408-c94b-471e-b64b-c1f8f345868e.json new file mode 100644 index 000000000..50ee1a5bb --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1/2316b408-c94b-471e-b64b-c1f8f345868e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.1/1762652580.2841558", + "retrieved_timestamp": "1762652580.284157", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6626296005709296 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48640249867140106 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13293051359516617 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38429166666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3849734042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2/49d47f6d-0d11-4b07-b42e-b94310c97d3e.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2/49d47f6d-0d11-4b07-b42e-b94310c97d3e.json new file mode 100644 index 000000000..2cc7daf84 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2/49d47f6d-0d11-4b07-b42e-b94310c97d3e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.2/1762652580.284375", + "retrieved_timestamp": "1762652580.284375", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49646715160219335 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.494592979290867 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41724999999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3968583776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3/0ec990b0-b908-44f5-9fb7-5ee603737bc7.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3/0ec990b0-b908-44f5-9fb7-5ee603737bc7.json new file mode 100644 index 000000000..76032093d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3/0ec990b0-b908-44f5-9fb7-5ee603737bc7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.3/1762652580.284589", + "retrieved_timestamp": "1762652580.284589", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49951462120506923 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5026055485090198 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12311178247734139 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41873958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4015957446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4/34c33a97-ae07-42e9-8025-9076e2bce3bb.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4/34c33a97-ae07-42e9-8025-9076e2bce3bb.json new file mode 100644 index 000000000..05a4f1ee2 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4/34c33a97-ae07-42e9-8025-9076e2bce3bb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.4/1762652580.284807", + "retrieved_timestamp": "1762652580.284807", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6078748830879843 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5467076263362468 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2809667673716012 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47138541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44190492021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5/bd4ff159-0bf9-4fe1-8cc8-9f3d7bb47bbc.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5/bd4ff159-0bf9-4fe1-8cc8-9f3d7bb47bbc.json new file mode 100644 index 000000000..1d0e7896e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5/bd4ff159-0bf9-4fe1-8cc8-9f3d7bb47bbc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.5/1762652580.2850199", + "retrieved_timestamp": "1762652580.2850208", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5650352176669016 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5522599149696679 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2756797583081571 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271812080536913 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49820833333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44481382978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7/4aa966fc-ee99-430c-8688-99565f5e6fcc.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7/4aa966fc-ee99-430c-8688-99565f5e6fcc.json new file mode 100644 index 000000000..c8a4157ee --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7/4aa966fc-ee99-430c-8688-99565f5e6fcc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.7/1762652580.285239", + "retrieved_timestamp": "1762652580.285239", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4201551882338861 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5391718355132782 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29154078549848944 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48484375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42802526595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8/e908901d-c122-4458-9d4e-9a7d1242211c.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8/e908901d-c122-4458-9d4e-9a7d1242211c.json new file mode 100644 index 000000000..0b8cd3b99 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8/e908901d-c122-4458-9d4e-9a7d1242211c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.8/1762652580.2854452", + "retrieved_timestamp": "1762652580.285446", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6255601803215468 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5446899383425835 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.270392749244713 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47671875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4343417553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.0/e9350de5-cae6-46bc-a83f-0e6e65eae4e3.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.0/e9350de5-cae6-46bc-a83f-0e6e65eae4e3.json new file mode 100644 index 000000000..eaf7786d9 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.0/e9350de5-cae6-46bc-a83f-0e6e65eae4e3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.0/1762652580.285652", + "retrieved_timestamp": "1762652580.2856529", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeffmeloy/Qwen2.5-7B-olm-v1.0", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jeffmeloy/Qwen2.5-7B-olm-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5331365222055258 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5659918212629057 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2862537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42776041666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4566156914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.1/769eabf2-4c12-4a48-8ec2-7dacf50a28f0.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.1/769eabf2-4c12-4a48-8ec2-7dacf50a28f0.json new file mode 100644 index 000000000..af46d71bb --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.1/769eabf2-4c12-4a48-8ec2-7dacf50a28f0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.1/1762652580.285865", + "retrieved_timestamp": "1762652580.285865", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeffmeloy/Qwen2.5-7B-olm-v1.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jeffmeloy/Qwen2.5-7B-olm-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4329445870290828 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5478077656573704 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38293051359516617 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48081250000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4354222074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.2/8c4531a4-4418-4090-9c82-f60bcf8d9935.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.2/8c4531a4-4418-4090-9c82-f60bcf8d9935.json new file mode 100644 index 000000000..b35d6d1df --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.2/8c4531a4-4418-4090-9c82-f60bcf8d9935.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.2/1762652580.286082", + "retrieved_timestamp": "1762652580.286083", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeffmeloy/Qwen2.5-7B-olm-v1.2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jeffmeloy/Qwen2.5-7B-olm-v1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42025492360270744 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5533340429711561 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2847432024169184 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46878125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4387466755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.3/a5c9246f-a7b5-4183-9a64-93151b536945.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.3/a5c9246f-a7b5-4183-9a64-93151b536945.json new file mode 100644 index 000000000..ebafcd423 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.3/a5c9246f-a7b5-4183-9a64-93151b536945.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.3/1762652580.286303", + "retrieved_timestamp": "1762652580.286304", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeffmeloy/Qwen2.5-7B-olm-v1.3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jeffmeloy/Qwen2.5-7B-olm-v1.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4218540140161438 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5531852688351706 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104229607250755 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4700520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44697473404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.4/1faf58ba-28e7-45a1-bc2c-d0aa707a49aa.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.4/1faf58ba-28e7-45a1-bc2c-d0aa707a49aa.json new file mode 100644 index 000000000..8554a40d0 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.4/1faf58ba-28e7-45a1-bc2c-d0aa707a49aa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.4/1762652580.286527", + "retrieved_timestamp": "1762652580.2865438", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeffmeloy/Qwen2.5-7B-olm-v1.4", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jeffmeloy/Qwen2.5-7B-olm-v1.4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4545018329144448 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5581962445576828 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29229607250755285 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46220833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4457280585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.5/b347eea5-e676-478e-b0ee-d53abf2c8697.json b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.5/b347eea5-e676-478e-b0ee-d53abf2c8697.json new file mode 100644 index 000000000..b768d9265 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jeffmeloy/Qwen2.5-7B-olm-v1.5/b347eea5-e676-478e-b0ee-d53abf2c8697.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.5/1762652580.286995", + "retrieved_timestamp": "1762652580.286996", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeffmeloy/Qwen2.5-7B-olm-v1.5", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jeffmeloy/Qwen2.5-7B-olm-v1.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4546514359676769 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5543943528577703 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28172205438066467 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33976510067114096 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4539270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43991023936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1/ba005ac7-761f-4cd7-91ed-34b88028240f.json b/data/hfopenllm_v2/alibaba/jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1/ba005ac7-761f-4cd7-91ed-34b88028240f.json new file mode 100644 index 000000000..cba002174 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1/ba005ac7-761f-4cd7-91ed-34b88028240f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jeffmeloy_jeffmeloy_Qwen2.5-7B-minperplexity-1/1762652580.2872581", + "retrieved_timestamp": "1762652580.2872589", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37571643239936703 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5582354546195324 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29154078549848944 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33221476510067116 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42903125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4367519946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe/35e56ec7-deae-4674-abfc-3c45f5dec040.json b/data/hfopenllm_v2/alibaba/kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe/35e56ec7-deae-4674-abfc-3c45f5dec040.json new file mode 100644 index 000000000..3c6db091a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe/35e56ec7-deae-4674-abfc-3c45f5dec040.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/kayfour_T3Q-Qwen2.5-7B-it-KOR-Safe/1762652580.3057542", + "retrieved_timestamp": "1762652580.305755", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6081497094376255 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5549941776226351 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37613293051359514 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42772916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44639295212765956 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/kms7530/chemeng_qwen-math-7b_24_1_100_1/af7f201f-3af3-4ffb-9416-c83235851cb6.json b/data/hfopenllm_v2/alibaba/kms7530/chemeng_qwen-math-7b_24_1_100_1/af7f201f-3af3-4ffb-9416-c83235851cb6.json new file mode 100644 index 000000000..02c9067ae --- /dev/null +++ b/data/hfopenllm_v2/alibaba/kms7530/chemeng_qwen-math-7b_24_1_100_1/af7f201f-3af3-4ffb-9416-c83235851cb6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/kms7530_chemeng_qwen-math-7b_24_1_100_1/1762652580.310198", + "retrieved_timestamp": "1762652580.310199", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kms7530/chemeng_qwen-math-7b_24_1_100_1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "kms7530/chemeng_qwen-math-7b_24_1_100_1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.211052230304481 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3578007894497858 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2243202416918429 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24412751677852348 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3686979166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21584109042553193 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 8.911 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath/8ae7c857-be7e-463e-86c2-6b165920a45c.json b/data/hfopenllm_v2/alibaba/kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath/8ae7c857-be7e-463e-86c2-6b165920a45c.json new file mode 100644 index 000000000..465479cff --- /dev/null +++ b/data/hfopenllm_v2/alibaba/kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath/8ae7c857-be7e-463e-86c2-6b165920a45c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/kms7530_chemeng_qwen-math-7b_24_1_100_1_nonmath/1762652580.310462", + "retrieved_timestamp": "1762652580.310463", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25836336476105626 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3892856967853256 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30966767371601206 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40869791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24517952127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 15.231 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_212_QwenLawLo/c4f888d2-c08c-43c4-a1f9-79edf519c893.json b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_212_QwenLawLo/c4f888d2-c08c-43c4-a1f9-79edf519c893.json new file mode 100644 index 000000000..6212f6dc0 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_212_QwenLawLo/c4f888d2-c08c-43c4-a1f9-79edf519c893.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_212_QwenLawLo/1762652580.322983", + "retrieved_timestamp": "1762652580.322984", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lkoenig/BBAI_212_QwenLawLo", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "lkoenig/BBAI_212_QwenLawLo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4566250880995758 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5574113357405873 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3602719033232628 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43696874999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44888630319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_212_Qwencore/d42a520c-15dd-4497-a26a-b6f77b3257e6.json b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_212_Qwencore/d42a520c-15dd-4497-a26a-b6f77b3257e6.json new file mode 100644 index 000000000..f5d94f721 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_212_Qwencore/d42a520c-15dd-4497-a26a-b6f77b3257e6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_212_Qwencore/1762652580.3232372", + "retrieved_timestamp": "1762652580.323238", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lkoenig/BBAI_212_Qwencore", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "lkoenig/BBAI_212_Qwencore" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4384400058511416 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.556868234536878 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34894259818731116 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4343333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.448969414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_230_Xiaqwen/c9393ea7-3269-435f-9159-95638b9c691e.json b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_230_Xiaqwen/c9393ea7-3269-435f-9159-95638b9c691e.json new file mode 100644 index 000000000..de42fa574 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_230_Xiaqwen/c9393ea7-3269-435f-9159-95638b9c691e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_230_Xiaqwen/1762652580.3234491", + "retrieved_timestamp": "1762652580.32345", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lkoenig/BBAI_230_Xiaqwen", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "lkoenig/BBAI_230_Xiaqwen" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4648931501748693 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.557779565750489 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36631419939577037 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4422083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4480551861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_375_QwenDyancabs/08e49740-3cdd-47b2-9b95-b96d8a13dd79.json b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_375_QwenDyancabs/08e49740-3cdd-47b2-9b95-b96d8a13dd79.json new file mode 100644 index 000000000..68a2b3761 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_375_QwenDyancabs/08e49740-3cdd-47b2-9b95-b96d8a13dd79.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_375_QwenDyancabs/1762652580.323661", + "retrieved_timestamp": "1762652580.323662", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lkoenig/BBAI_375_QwenDyancabs", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "lkoenig/BBAI_375_QwenDyancabs" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4565752204151651 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5571383122938682 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.377643504531722 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44617708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4476396276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_456_QwenKoen/249b0b65-5c71-4c5d-9802-28df0ead0cdf.json b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_456_QwenKoen/249b0b65-5c71-4c5d-9802-28df0ead0cdf.json new file mode 100644 index 000000000..2f2d1e8b7 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_456_QwenKoen/249b0b65-5c71-4c5d-9802-28df0ead0cdf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_456_QwenKoen/1762652580.323869", + "retrieved_timestamp": "1762652580.323869", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lkoenig/BBAI_456_QwenKoen", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "lkoenig/BBAI_456_QwenKoen" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45292823042859615 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5552713612233481 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3685800604229607 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4395104166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4468916223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_KoenQwenDyan/fe084d09-ee80-4c7f-93a7-3ee0f9081177.json b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_KoenQwenDyan/fe084d09-ee80-4c7f-93a7-3ee0f9081177.json new file mode 100644 index 000000000..1b8d76394 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_KoenQwenDyan/fe084d09-ee80-4c7f-93a7-3ee0f9081177.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_7B_KoenQwenDyan/1762652580.324076", + "retrieved_timestamp": "1762652580.3240771", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lkoenig/BBAI_7B_KoenQwenDyan", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "lkoenig/BBAI_7B_KoenQwenDyan" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5807224830117421 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5536566841353078 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37386706948640486 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43687499999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44597739361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_Qwen2.5koen/078cedea-7b3a-4c77-b932-3d42f0c841fe.json b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_Qwen2.5koen/078cedea-7b3a-4c77-b932-3d42f0c841fe.json new file mode 100644 index 000000000..cb083261a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_Qwen2.5koen/078cedea-7b3a-4c77-b932-3d42f0c841fe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_7B_Qwen2.5koen/1762652580.324276", + "retrieved_timestamp": "1762652580.324277", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lkoenig/BBAI_7B_Qwen2.5koen", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "lkoenig/BBAI_7B_Qwen2.5koen" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45999725173650363 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5544031312134464 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36555891238670696 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43690625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4484707446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_QwenDyanKoenLo/dedc34ed-fd8f-4b29-b898-3c9830993247.json b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_QwenDyanKoenLo/dedc34ed-fd8f-4b29-b898-3c9830993247.json new file mode 100644 index 000000000..1cb7e6f83 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_QwenDyanKoenLo/dedc34ed-fd8f-4b29-b898-3c9830993247.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_7B_QwenDyanKoenLo/1762652580.324512", + "retrieved_timestamp": "1762652580.324513", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lkoenig/BBAI_7B_QwenDyanKoenLo", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "lkoenig/BBAI_7B_QwenDyanKoenLo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46631714960748594 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5562461525503201 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3640483383685801 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4343020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4464760638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_QwenDyancabsLAW/05f391f3-68ac-422a-b7e8-01eba1729a0b.json b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_QwenDyancabsLAW/05f391f3-68ac-422a-b7e8-01eba1729a0b.json new file mode 100644 index 000000000..f21cabfb3 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/lkoenig/BBAI_7B_QwenDyancabsLAW/05f391f3-68ac-422a-b7e8-01eba1729a0b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_7B_QwenDyancabsLAW/1762652580.3247318", + "retrieved_timestamp": "1762652580.3247318", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lkoenig/BBAI_7B_QwenDyancabsLAW", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "lkoenig/BBAI_7B_QwenDyancabsLAW" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5549685944405289 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5578836606885887 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3678247734138973 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4461145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4471409574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/macadeliccc/Samantha-Qwen-2-7B/c443492e-3b5f-4394-9fbb-761dba338638.json b/data/hfopenllm_v2/alibaba/macadeliccc/Samantha-Qwen-2-7B/c443492e-3b5f-4394-9fbb-761dba338638.json new file mode 100644 index 000000000..4cc97048d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/macadeliccc/Samantha-Qwen-2-7B/c443492e-3b5f-4394-9fbb-761dba338638.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/macadeliccc_Samantha-Qwen-2-7B/1762652580.3290062", + "retrieved_timestamp": "1762652580.3290062", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "macadeliccc/Samantha-Qwen-2-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "macadeliccc/Samantha-Qwen-2-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4377152621710395 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5082341412476951 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21148036253776434 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4799479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3779089095744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1/19b72caf-a841-4928-98c3-c505694724c3.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1/19b72caf-a841-4928-98c3-c505694724c3.json new file mode 100644 index 000000000..a2f51c6d6 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1/19b72caf-a841-4928-98c3-c505694724c3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.1/1762652580.333172", + "retrieved_timestamp": "1762652580.333172", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7444868504457063 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.555919540267728 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4652567975830816 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4073333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.429936835106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3/36b2821f-5fa6-4384-9ddc-6cbc5b52321c.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3/36b2821f-5fa6-4384-9ddc-6cbc5b52321c.json new file mode 100644 index 000000000..187ceb497 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3/36b2821f-5fa6-4384-9ddc-6cbc5b52321c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.3/1762652580.333376", + "retrieved_timestamp": "1762652580.3333771", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.704320092909037 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5516165586639877 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47583081570996977 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43105208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44398271276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST/80d3a785-dde1-44fa-b6e1-93722849fdb1.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST/80d3a785-dde1-44fa-b6e1-93722849fdb1.json new file mode 100644 index 000000000..5214db02a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-MST/80d3a785-dde1-44fa-b6e1-93722849fdb1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-MST/1762652580.332918", + "retrieved_timestamp": "1762652580.3329191", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/Hush-Qwen2.5-7B-MST", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7488330961847898 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5458495423775734 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4244712990936556 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3913645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41630651595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-Preview/6bfc8cf9-e615-4447-bc6e-ff96752dc5fb.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-Preview/6bfc8cf9-e615-4447-bc6e-ff96752dc5fb.json new file mode 100644 index 000000000..81c9ade0e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-Preview/6bfc8cf9-e615-4447-bc6e-ff96752dc5fb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-Preview/1762652580.333591", + "retrieved_timestamp": "1762652580.3335922", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/Hush-Qwen2.5-7B-Preview", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "marcuscedricridia/Hush-Qwen2.5-7B-Preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7962439660101863 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5431064770878757 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37537764350453173 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4298125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43641954787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M/feefc068-9257-4d0f-ac55-acd08ededeca.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M/feefc068-9257-4d0f-ac55-acd08ededeca.json new file mode 100644 index 000000000..b1be1b278 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M/feefc068-9257-4d0f-ac55-acd08ededeca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-RP-v1.4-1M/1762652580.333802", + "retrieved_timestamp": "1762652580.333802", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7727884236049238 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5295123017150106 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3368580060422961 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44327083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4134807180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.1/25d6c4bd-6540-43cb-a682-77d4fa4eb64e.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.1/25d6c4bd-6540-43cb-a682-77d4fa4eb64e.json new file mode 100644 index 000000000..39aedf392 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.1/25d6c4bd-6540-43cb-a682-77d4fa4eb64e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-v1.1/1762652580.334015", + "retrieved_timestamp": "1762652580.334016", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/Hush-Qwen2.5-7B-v1.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7889499860370484 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5383575636307666 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4179375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4227061170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.2/6e342711-8d2d-42ed-a019-11be429e10d8.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.2/6e342711-8d2d-42ed-a019-11be429e10d8.json new file mode 100644 index 000000000..937c6a72e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.2/6e342711-8d2d-42ed-a019-11be429e10d8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-v1.2/1762652580.334213", + "retrieved_timestamp": "1762652580.334214", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/Hush-Qwen2.5-7B-v1.2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7865020368178655 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.540250407222091 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44033232628398794 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.421875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4197140957446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.3/1af605c0-ec58-4651-a57a-2fd7d0cd5a67.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.3/1af605c0-ec58-4651-a57a-2fd7d0cd5a67.json new file mode 100644 index 000000000..3a2036c67 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.3/1af605c0-ec58-4651-a57a-2fd7d0cd5a67.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-v1.3/1762652580.334473", + "retrieved_timestamp": "1762652580.334474", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/Hush-Qwen2.5-7B-v1.3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7856276900845313 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5326893189699237 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3323262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42463541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43450797872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.4/fd65e319-bc38-457b-9913-9a2214e69823.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.4/fd65e319-bc38-457b-9913-9a2214e69823.json new file mode 100644 index 000000000..e687f5eab --- /dev/null +++ b/data/hfopenllm_v2/alibaba/marcuscedricridia/Hush-Qwen2.5-7B-v1.4/fd65e319-bc38-457b-9913-9a2214e69823.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-v1.4/1762652580.334734", + "retrieved_timestamp": "1762652580.3347352", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/Hush-Qwen2.5-7B-v1.4", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7834545672149895 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.54229983590397 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4259818731117825 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4231770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4195478723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Qwen2.5-7B-Preview/56032f8a-b733-4b1f-acbc-78d0d1ddf2a5.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Qwen2.5-7B-Preview/56032f8a-b733-4b1f-acbc-78d0d1ddf2a5.json new file mode 100644 index 000000000..29d967667 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/marcuscedricridia/Qwen2.5-7B-Preview/56032f8a-b733-4b1f-acbc-78d0d1ddf2a5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Qwen2.5-7B-Preview/1762652580.334959", + "retrieved_timestamp": "1762652580.334959", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/Qwen2.5-7B-Preview", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "marcuscedricridia/Qwen2.5-7B-Preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7679423928509688 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5359781834039953 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34441087613293053 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41403125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42578125 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1/be0058b1-23b2-40b7-b336-ab40bf82c997.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1/be0058b1-23b2-40b7-b336-ab40bf82c997.json new file mode 100644 index 000000000..4d7920fac --- /dev/null +++ b/data/hfopenllm_v2/alibaba/marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1/be0058b1-23b2-40b7-b336-ab40bf82c997.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Yell-Qwen2.5-7B-Preview-v1.1/1762652580.335416", + "retrieved_timestamp": "1762652580.335417", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5757013612769672 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5347734083768815 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18957703927492447 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4059375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38314494680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/marcuscedricridia/Yell-Qwen2.5-7B-Preview/f47334f2-f0ab-48f5-814e-f3ede36802d9.json b/data/hfopenllm_v2/alibaba/marcuscedricridia/Yell-Qwen2.5-7B-Preview/f47334f2-f0ab-48f5-814e-f3ede36802d9.json new file mode 100644 index 000000000..de5d1157f --- /dev/null +++ b/data/hfopenllm_v2/alibaba/marcuscedricridia/Yell-Qwen2.5-7B-Preview/f47334f2-f0ab-48f5-814e-f3ede36802d9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Yell-Qwen2.5-7B-Preview/1762652580.335188", + "retrieved_timestamp": "1762652580.335188", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/Yell-Qwen2.5-7B-Preview", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "marcuscedricridia/Yell-Qwen2.5-7B-Preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5838696879834395 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.537136379549371 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19259818731117825 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40463541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37982047872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/maywell/Qwen2-7B-Multilingual-RP/fd91f8aa-a521-4e9b-824a-aa21adade569.json b/data/hfopenllm_v2/alibaba/maywell/Qwen2-7B-Multilingual-RP/fd91f8aa-a521-4e9b-824a-aa21adade569.json new file mode 100644 index 000000000..9bbede6a5 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/maywell/Qwen2-7B-Multilingual-RP/fd91f8aa-a521-4e9b-824a-aa21adade569.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/maywell_Qwen2-7B-Multilingual-RP/1762652580.342533", + "retrieved_timestamp": "1762652580.3425338", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "maywell/Qwen2-7B-Multilingual-RP", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "maywell/Qwen2-7B-Multilingual-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4347176602525743 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5062058680861069 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2243202416918429 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3695625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3858876329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/mergekit-community/SuperQwen-2.5-1.5B/95d33475-a71b-41d6-a08d-3da30e631897.json b/data/hfopenllm_v2/alibaba/mergekit-community/SuperQwen-2.5-1.5B/95d33475-a71b-41d6-a08d-3da30e631897.json new file mode 100644 index 000000000..1e8c944c0 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/mergekit-community/SuperQwen-2.5-1.5B/95d33475-a71b-41d6-a08d-3da30e631897.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mergekit-community_SuperQwen-2.5-1.5B/1762652580.346312", + "retrieved_timestamp": "1762652580.346313", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-community/SuperQwen-2.5-1.5B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "mergekit-community/SuperQwen-2.5-1.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1336409615376091 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2906897601443365 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3355208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10746343085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/mhl1/Qwen2.5-0.5B-cinstruct-stage1/bf9d8219-66b9-4c77-8c6d-2983e60dc2cb.json b/data/hfopenllm_v2/alibaba/mhl1/Qwen2.5-0.5B-cinstruct-stage1/bf9d8219-66b9-4c77-8c6d-2983e60dc2cb.json new file mode 100644 index 000000000..e854fd701 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/mhl1/Qwen2.5-0.5B-cinstruct-stage1/bf9d8219-66b9-4c77-8c6d-2983e60dc2cb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mhl1_Qwen2.5-0.5B-cinstruct-stage1/1762652580.3535528", + "retrieved_timestamp": "1762652580.353554", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mhl1/Qwen2.5-0.5B-cinstruct-stage1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "mhl1/Qwen2.5-0.5B-cinstruct-stage1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14817905379947427 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32557832478283544 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35003125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11394614361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/migtissera/Tess-v2.5.2-Qwen2-72B/34b9dd9e-dc03-4354-b016-3b1463a902f9.json b/data/hfopenllm_v2/alibaba/migtissera/Tess-v2.5.2-Qwen2-72B/34b9dd9e-dc03-4354-b016-3b1463a902f9.json new file mode 100644 index 000000000..a5fdfff15 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/migtissera/Tess-v2.5.2-Qwen2-72B/34b9dd9e-dc03-4354-b016-3b1463a902f9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/migtissera_Tess-v2.5.2-Qwen2-72B/1762652580.359263", + "retrieved_timestamp": "1762652580.359264", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "migtissera/Tess-v2.5.2-Qwen2-72B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "migtissera/Tess-v2.5.2-Qwen2-72B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44943084349525925 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6646791891060648 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2938066465256798 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35067114093959734 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41883333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5561003989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/minghaowu/Qwen1.5-1.8B-OpenHermes-2.5/cf3f376a-92ec-4678-a57a-cee2e40032a5.json b/data/hfopenllm_v2/alibaba/minghaowu/Qwen1.5-1.8B-OpenHermes-2.5/cf3f376a-92ec-4678-a57a-cee2e40032a5.json new file mode 100644 index 000000000..ec0299a78 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/minghaowu/Qwen1.5-1.8B-OpenHermes-2.5/cf3f376a-92ec-4678-a57a-cee2e40032a5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/minghaowu_Qwen1.5-1.8B-OpenHermes-2.5/1762652580.360414", + "retrieved_timestamp": "1762652580.360415", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "minghaowu/Qwen1.5-1.8B-OpenHermes-2.5", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "minghaowu/Qwen1.5-1.8B-OpenHermes-2.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27779735546128714 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33746396801266015 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02416918429003021 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3528854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17918882978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.837 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1/99d27765-a9c5-4f50-8bd1-c3ce67683621.json b/data/hfopenllm_v2/alibaba/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1/99d27765-a9c5-4f50-8bd1-c3ce67683621.json new file mode 100644 index 000000000..ebd0d0ea1 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1/99d27765-a9c5-4f50-8bd1-c3ce67683621.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mobiuslabsgmbh_DeepSeek-R1-ReDistill-Qwen-7B-v1.1/1762652580.371459", + "retrieved_timestamp": "1762652580.3714602", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34731512387132807 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36983762765044165 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3496978851963746 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40088541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23262965425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-1.5B/f2eaeee8-a75b-4d0f-9dcd-2a11c3de926b.json b/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-1.5B/f2eaeee8-a75b-4d0f-9dcd-2a11c3de926b.json new file mode 100644 index 000000000..e03df7d08 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-1.5B/f2eaeee8-a75b-4d0f-9dcd-2a11c3de926b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Dumpling-Qwen2.5-1.5B/1762652580.377223", + "retrieved_timestamp": "1762652580.377223", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Dumpling-Qwen2.5-1.5B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "nbeerbower/Dumpling-Qwen2.5-1.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3698963195432563 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4159743091354106 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11706948640483383 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37276041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2771775265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-14B/0a70cdb4-5ccc-40e2-bf99-3af619b8b7f6.json b/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-14B/0a70cdb4-5ccc-40e2-bf99-3af619b8b7f6.json new file mode 100644 index 000000000..d6d272fcf --- /dev/null +++ b/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-14B/0a70cdb4-5ccc-40e2-bf99-3af619b8b7f6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Dumpling-Qwen2.5-14B/1762652580.3774788", + "retrieved_timestamp": "1762652580.37748", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Dumpling-Qwen2.5-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "nbeerbower/Dumpling-Qwen2.5-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6064010159709571 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6450644262798378 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30966767371601206 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43539583333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5170378989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-7B-1k-r16/76e3f2a5-7545-4270-800d-6413e39608ad.json b/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-7B-1k-r16/76e3f2a5-7545-4270-800d-6413e39608ad.json new file mode 100644 index 000000000..82481c2e5 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-7B-1k-r16/76e3f2a5-7545-4270-800d-6413e39608ad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Dumpling-Qwen2.5-7B-1k-r16/1762652580.3776908", + "retrieved_timestamp": "1762652580.377692", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r16", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r16" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4860004787297703 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5214228032573378 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.236404833836858 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4229895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39586103723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5/2e6c1c46-01af-493a-a2ce-266d13b53000.json b/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5/2e6c1c46-01af-493a-a2ce-266d13b53000.json new file mode 100644 index 000000000..2fc0fe24c --- /dev/null +++ b/data/hfopenllm_v2/alibaba/nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5/2e6c1c46-01af-493a-a2ce-266d13b53000.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Dumpling-Qwen2.5-7B-1k-r64-2e-5/1762652580.377894", + "retrieved_timestamp": "1762652580.377894", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.417906709752346 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5300548108450988 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21148036253776434 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4486041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41215093085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B/dea423e8-cdbd-4895-80af-f53dbb5caa1c.json b/data/hfopenllm_v2/alibaba/nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B/dea423e8-cdbd-4895-80af-f53dbb5caa1c.json new file mode 100644 index 000000000..979ea7c06 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B/dea423e8-cdbd-4895-80af-f53dbb5caa1c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_EVA-abliterated-TIES-Qwen2.5-1.5B/1762652580.378096", + "retrieved_timestamp": "1762652580.3780968", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41148707651254224 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39965589836197535 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13746223564954682 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35018750000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27119348404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B/997fc8c5-fc91-4e9e-a2b7-bdda77e4f4a7.json b/data/hfopenllm_v2/alibaba/nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B/997fc8c5-fc91-4e9e-a2b7-bdda77e4f4a7.json new file mode 100644 index 000000000..b0473b646 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B/997fc8c5-fc91-4e9e-a2b7-bdda77e4f4a7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_EVA-abliterated-TIES-Qwen2.5-14B/1762652580.378304", + "retrieved_timestamp": "1762652580.378304", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.783554302583811 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6372016353633118 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5045317220543807 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3548657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4406666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5211103723404256 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/nbeerbower/Qwen2.5-Gutenberg-Doppel-14B/649483fb-4b54-4824-82eb-e78e55e53912.json b/data/hfopenllm_v2/alibaba/nbeerbower/Qwen2.5-Gutenberg-Doppel-14B/649483fb-4b54-4824-82eb-e78e55e53912.json new file mode 100644 index 000000000..e7fb6674d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/nbeerbower/Qwen2.5-Gutenberg-Doppel-14B/649483fb-4b54-4824-82eb-e78e55e53912.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Qwen2.5-Gutenberg-Doppel-14B/1762652580.38376", + "retrieved_timestamp": "1762652580.38376", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Qwen2.5-Gutenberg-Doppel-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "nbeerbower/Qwen2.5-Gutenberg-Doppel-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8090832324897937 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6381735755183319 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5415407854984894 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33305369127516776 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4100625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49210438829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/0d99e863-596f-43b7-932e-a4a27435e63d.json b/data/hfopenllm_v2/alibaba/netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/0d99e863-596f-43b7-932e-a4a27435e63d.json new file mode 100644 index 000000000..fda4d8a3f --- /dev/null +++ b/data/hfopenllm_v2/alibaba/netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/0d99e863-596f-43b7-932e-a4a27435e63d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/1762652580.391702", + "retrieved_timestamp": "1762652580.3917031", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11500596195871399 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28767781029884354 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0015105740181268882 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3723854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10895944148936171 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/399b43e8-3c07-4f3d-8b3e-50b8acd96e78.json b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/399b43e8-3c07-4f3d-8b3e-50b8acd96e78.json new file mode 100644 index 000000000..9b126a916 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/399b43e8-3c07-4f3d-8b3e-50b8acd96e78.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/1762652580.400365", + "retrieved_timestamp": "1762652580.400365", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5878413720040603 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5236664966992856 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3376132930513595 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39257291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.390375664893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7b-MFANN-slerp/d621c163-5ca6-4e54-8913-d931e4a2c6b9.json b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7b-MFANN-slerp/d621c163-5ca6-4e54-8913-d931e4a2c6b9.json new file mode 100644 index 000000000..a7485b3da --- /dev/null +++ b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7b-MFANN-slerp/d621c163-5ca6-4e54-8913-d931e4a2c6b9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-7b-MFANN-slerp/1762652580.4005811", + "retrieved_timestamp": "1762652580.4005818", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/Qwen2.5-7b-MFANN-slerp", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "netcat420/Qwen2.5-7b-MFANN-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6532123654126606 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5088729928004616 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28700906344410876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40730208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3416722074468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp/170aa8c2-6b80-44d3-9d22-c1a5f7fa2ad4.json b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp/170aa8c2-6b80-44d3-9d22-c1a5f7fa2ad4.json new file mode 100644 index 000000000..05ceac128 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp/170aa8c2-6b80-44d3-9d22-c1a5f7fa2ad4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-7b-nerd-uncensored-MFANN-slerp/1762652580.4007921", + "retrieved_timestamp": "1762652580.400793", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15644711587476784 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2920111436321769 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3791770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11003989361702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/2f89ceb3-8bc1-48f0-a4cb-3dc1b8acad87.json b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/2f89ceb3-8bc1-48f0-a4cb-3dc1b8acad87.json new file mode 100644 index 000000000..d7c09d8dd --- /dev/null +++ b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/2f89ceb3-8bc1-48f0-a4cb-3dc1b8acad87.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/1762652580.4012349", + "retrieved_timestamp": "1762652580.401236", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6486411610083467 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5065573474607916 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2990936555891239 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41520833333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3431682180851064 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/bbd39707-6062-461a-8e09-c8b8bc3451f7.json b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/bbd39707-6062-461a-8e09-c8b8bc3451f7.json new file mode 100644 index 000000000..1bf971316 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/bbd39707-6062-461a-8e09-c8b8bc3451f7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/1762652580.4010181", + "retrieved_timestamp": "1762652580.4010189", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5742274941599401 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5071448530886461 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.256797583081571 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40584375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3156582446808511 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/9b2011ae-9d22-42be-a10b-6ce6e8ff1be4.json b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/9b2011ae-9d22-42be-a10b-6ce6e8ff1be4.json new file mode 100644 index 000000000..041b17669 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/9b2011ae-9d22-42be-a10b-6ce6e8ff1be4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/1762652580.401459", + "retrieved_timestamp": "1762652580.40146", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2675556412540947 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37890218644722085 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01812688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23238255033557048 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35279166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16771941489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-MFANN-7b/b6578885-9721-4349-ad55-5a80fd054c85.json b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-MFANN-7b/b6578885-9721-4349-ad55-5a80fd054c85.json new file mode 100644 index 000000000..86c2feaea --- /dev/null +++ b/data/hfopenllm_v2/alibaba/netcat420/Qwen2.5-MFANN-7b/b6578885-9721-4349-ad55-5a80fd054c85.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-MFANN-7b/1762652580.401672", + "retrieved_timestamp": "1762652580.401673", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/Qwen2.5-MFANN-7b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "netcat420/Qwen2.5-MFANN-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6097233119234742 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5054347004252888 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27870090634441086 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4020625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32330452127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-SLERP-V1.2/dfacdde9-fd5d-496f-8038-aa0439c0c991.json b/data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-SLERP-V1.2/dfacdde9-fd5d-496f-8038-aa0439c0c991.json new file mode 100644 index 000000000..65431ed08 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-SLERP-V1.2/dfacdde9-fd5d-496f-8038-aa0439c0c991.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_qwen2.5-MFANN-7b-SLERP-V1.2/1762652580.40188", + "retrieved_timestamp": "1762652580.40188", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/qwen2.5-MFANN-7b-SLERP-V1.2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "netcat420/qwen2.5-MFANN-7b-SLERP-V1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6606060807546199 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5111030308243185 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28700906344410876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4259375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34383311170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-SLERPv1.1/0e66b7a6-bd6f-48f7-95e2-c117e0ea468f.json b/data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-SLERPv1.1/0e66b7a6-bd6f-48f7-95e2-c117e0ea468f.json new file mode 100644 index 000000000..1fe39bbda --- /dev/null +++ b/data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-SLERPv1.1/0e66b7a6-bd6f-48f7-95e2-c117e0ea468f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_qwen2.5-MFANN-7b-SLERPv1.1/1762652580.402082", + "retrieved_timestamp": "1762652580.4020832", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/qwen2.5-MFANN-7b-SLERPv1.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "netcat420/qwen2.5-MFANN-7b-SLERPv1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6554852236510238 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5074761993537673 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29682779456193353 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41263541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34483045212765956 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-v1.1/845f96b7-62dc-4ebc-aa62-fcc6263e437f.json b/data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-v1.1/845f96b7-62dc-4ebc-aa62-fcc6263e437f.json new file mode 100644 index 000000000..b67c55212 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/netcat420/qwen2.5-MFANN-7b-v1.1/845f96b7-62dc-4ebc-aa62-fcc6263e437f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_qwen2.5-MFANN-7b-v1.1/1762652580.402283", + "retrieved_timestamp": "1762652580.4022841", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/qwen2.5-MFANN-7b-v1.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "netcat420/qwen2.5-MFANN-7b-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6088489651901399 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49666375554657477 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2824773413897281 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41139583333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3248005319148936 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/newsbang/Homer-v0.3-Qwen2.5-7B/0bc5145c-90d0-4a8b-89c6-0b03aa9d0ee1.json b/data/hfopenllm_v2/alibaba/newsbang/Homer-v0.3-Qwen2.5-7B/0bc5145c-90d0-4a8b-89c6-0b03aa9d0ee1.json new file mode 100644 index 000000000..d727387b9 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/newsbang/Homer-v0.3-Qwen2.5-7B/0bc5145c-90d0-4a8b-89c6-0b03aa9d0ee1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/newsbang_Homer-v0.3-Qwen2.5-7B/1762652580.4035761", + "retrieved_timestamp": "1762652580.403577", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "newsbang/Homer-v0.3-Qwen2.5-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "newsbang/Homer-v0.3-Qwen2.5-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5154013572875525 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5480594290467807 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30891238670694865 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3338926174496644 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47436458333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.445561835106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/newsbang/Homer-v0.4-Qwen2.5-7B/9a022bdc-d1b8-4f2e-a1af-6cd3bad6bded.json b/data/hfopenllm_v2/alibaba/newsbang/Homer-v0.4-Qwen2.5-7B/9a022bdc-d1b8-4f2e-a1af-6cd3bad6bded.json new file mode 100644 index 000000000..a61aa977e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/newsbang/Homer-v0.4-Qwen2.5-7B/9a022bdc-d1b8-4f2e-a1af-6cd3bad6bded.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/newsbang_Homer-v0.4-Qwen2.5-7B/1762652580.403887", + "retrieved_timestamp": "1762652580.4038882", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "newsbang/Homer-v0.4-Qwen2.5-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "newsbang/Homer-v0.4-Qwen2.5-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.799940823681166 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5533099174800821 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27794561933534745 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4310833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4362533244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/newsbang/Homer-v0.5-Qwen2.5-7B/dc22ad83-0752-4f5e-97ac-733ef6c6cf53.json b/data/hfopenllm_v2/alibaba/newsbang/Homer-v0.5-Qwen2.5-7B/dc22ad83-0752-4f5e-97ac-733ef6c6cf53.json new file mode 100644 index 000000000..574b390dc --- /dev/null +++ b/data/hfopenllm_v2/alibaba/newsbang/Homer-v0.5-Qwen2.5-7B/dc22ad83-0752-4f5e-97ac-733ef6c6cf53.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/newsbang_Homer-v0.5-Qwen2.5-7B/1762652580.404095", + "retrieved_timestamp": "1762652580.404096", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "newsbang/Homer-v0.5-Qwen2.5-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "newsbang/Homer-v0.5-Qwen2.5-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7880756393037142 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5540181073562815 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3723564954682779 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41930208333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4369182180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/newsbang/Homer-v1.0-Qwen2.5-72B/3ebdda73-1c41-4a98-b3cf-ac5d482c8b5c.json b/data/hfopenllm_v2/alibaba/newsbang/Homer-v1.0-Qwen2.5-72B/3ebdda73-1c41-4a98-b3cf-ac5d482c8b5c.json new file mode 100644 index 000000000..b5e681cdd --- /dev/null +++ b/data/hfopenllm_v2/alibaba/newsbang/Homer-v1.0-Qwen2.5-72B/3ebdda73-1c41-4a98-b3cf-ac5d482c8b5c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/newsbang_Homer-v1.0-Qwen2.5-72B/1762652580.404309", + "retrieved_timestamp": "1762652580.40431", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "newsbang/Homer-v1.0-Qwen2.5-72B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "newsbang/Homer-v1.0-Qwen2.5-72B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7627716680629618 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7309799550978827 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4901812688821752 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4161073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4677291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6145279255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/newsbang/Homer-v1.0-Qwen2.5-7B/1fe21571-0375-43c3-8071-1aaaf0223baa.json b/data/hfopenllm_v2/alibaba/newsbang/Homer-v1.0-Qwen2.5-7B/1fe21571-0375-43c3-8071-1aaaf0223baa.json new file mode 100644 index 000000000..32a45823d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/newsbang/Homer-v1.0-Qwen2.5-7B/1fe21571-0375-43c3-8071-1aaaf0223baa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/newsbang_Homer-v1.0-Qwen2.5-7B/1762652580.404567", + "retrieved_timestamp": "1762652580.404568", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "newsbang/Homer-v1.0-Qwen2.5-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "newsbang/Homer-v1.0-Qwen2.5-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6392737935344885 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5655254177370223 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3323262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221476510067114 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42782291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45345744680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/nguyentd/FinancialAdvice-Qwen2.5-7B/0ced7574-bfc4-4958-a6f5-0944f9ac411a.json b/data/hfopenllm_v2/alibaba/nguyentd/FinancialAdvice-Qwen2.5-7B/0ced7574-bfc4-4958-a6f5-0944f9ac411a.json new file mode 100644 index 000000000..ed71c4d43 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/nguyentd/FinancialAdvice-Qwen2.5-7B/0ced7574-bfc4-4958-a6f5-0944f9ac411a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nguyentd_FinancialAdvice-Qwen2.5-7B/1762652580.404779", + "retrieved_timestamp": "1762652580.4047801", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nguyentd/FinancialAdvice-Qwen2.5-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "nguyentd/FinancialAdvice-Qwen2.5-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.449605934476079 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4730934153895792 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1148036253776435 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40248958333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.375249335106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/nisten/franqwenstein-35b/3e3344d2-6911-4d5f-85d6-6593cbed3b49.json b/data/hfopenllm_v2/alibaba/nisten/franqwenstein-35b/3e3344d2-6911-4d5f-85d6-6593cbed3b49.json new file mode 100644 index 000000000..a9a9c6c96 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/nisten/franqwenstein-35b/3e3344d2-6911-4d5f-85d6-6593cbed3b49.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nisten_franqwenstein-35b/1762652580.407119", + "retrieved_timestamp": "1762652580.40712", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nisten/franqwenstein-35b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "nisten/franqwenstein-35b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39135383005979685 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6591132598701116 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.304380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35906040268456374 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4681041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5610871010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 34.714 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/nisten/franqwenstein-35b/ff90ed4a-6dcf-4b9b-9d3a-19f933e2c0c8.json b/data/hfopenllm_v2/alibaba/nisten/franqwenstein-35b/ff90ed4a-6dcf-4b9b-9d3a-19f933e2c0c8.json new file mode 100644 index 000000000..7fe6c03cc --- /dev/null +++ b/data/hfopenllm_v2/alibaba/nisten/franqwenstein-35b/ff90ed4a-6dcf-4b9b-9d3a-19f933e2c0c8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nisten_franqwenstein-35b/1762652580.406877", + "retrieved_timestamp": "1762652580.406878", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nisten/franqwenstein-35b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "nisten/franqwenstein-35b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37986320740080765 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6646579178049268 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3406344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4035234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49402083333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5730551861702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 34.714 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/nisten/tqwendo-36b/3a5b1794-12f1-4004-bdb2-309cc950c757.json b/data/hfopenllm_v2/alibaba/nisten/tqwendo-36b/3a5b1794-12f1-4004-bdb2-309cc950c757.json new file mode 100644 index 000000000..96a164cee --- /dev/null +++ b/data/hfopenllm_v2/alibaba/nisten/tqwendo-36b/3a5b1794-12f1-4004-bdb2-309cc950c757.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nisten_tqwendo-36b/1762652580.40731", + "retrieved_timestamp": "1762652580.4073112", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nisten/tqwendo-36b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "nisten/tqwendo-36b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6777672132164878 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6431830832659088 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41540785498489424 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44295833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4380817819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 35.69 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/prithivMLmods/Galactic-Qwen-14B-Exp1/26aea3e6-571c-4751-8b0f-40a86a144973.json b/data/hfopenllm_v2/alibaba/prithivMLmods/Galactic-Qwen-14B-Exp1/26aea3e6-571c-4751-8b0f-40a86a144973.json new file mode 100644 index 000000000..e2a00f3ad --- /dev/null +++ b/data/hfopenllm_v2/alibaba/prithivMLmods/Galactic-Qwen-14B-Exp1/26aea3e6-571c-4751-8b0f-40a86a144973.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Galactic-Qwen-14B-Exp1/1762652580.463281", + "retrieved_timestamp": "1762652580.463281", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Galactic-Qwen-14B-Exp1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "prithivMLmods/Galactic-Qwen-14B-Exp1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5832202999153357 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6582262489447345 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40181268882175225 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3934563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4780520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.539561170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/prithivMLmods/Galactic-Qwen-14B-Exp2/2fcdb8f8-5ec6-494a-b690-fa96febdb02a.json b/data/hfopenllm_v2/alibaba/prithivMLmods/Galactic-Qwen-14B-Exp2/2fcdb8f8-5ec6-494a-b690-fa96febdb02a.json new file mode 100644 index 000000000..35bc920ac --- /dev/null +++ b/data/hfopenllm_v2/alibaba/prithivMLmods/Galactic-Qwen-14B-Exp2/2fcdb8f8-5ec6-494a-b690-fa96febdb02a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Galactic-Qwen-14B-Exp2/1762652580.463546", + "retrieved_timestamp": "1762652580.463547", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Galactic-Qwen-14B-Exp2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "prithivMLmods/Galactic-Qwen-14B-Exp2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6620300801872365 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7203002699449659 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39932885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5353854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5690658244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/prithivMLmods/Magellanic-Qwen-25B-R999/08bfcf7b-e051-4c64-b1ee-0044cfa166f0.json b/data/hfopenllm_v2/alibaba/prithivMLmods/Magellanic-Qwen-25B-R999/08bfcf7b-e051-4c64-b1ee-0044cfa166f0.json new file mode 100644 index 000000000..c3a6c36c6 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/prithivMLmods/Magellanic-Qwen-25B-R999/08bfcf7b-e051-4c64-b1ee-0044cfa166f0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Magellanic-Qwen-25B-R999/1762652580.466958", + "retrieved_timestamp": "1762652580.466959", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Magellanic-Qwen-25B-R999", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "prithivMLmods/Magellanic-Qwen-25B-R999" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18727199386516663 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26075689808294905 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.005287009063444109 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25083892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3831145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1299867021276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 24.962 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/prithivMLmods/Qwen-7B-Distill-Reasoner/7afe076b-7f6a-42c1-9c43-652ea3ca94a9.json b/data/hfopenllm_v2/alibaba/prithivMLmods/Qwen-7B-Distill-Reasoner/7afe076b-7f6a-42c1-9c43-652ea3ca94a9.json new file mode 100644 index 000000000..477c87b6a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/prithivMLmods/Qwen-7B-Distill-Reasoner/7afe076b-7f6a-42c1-9c43-652ea3ca94a9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Qwen-7B-Distill-Reasoner/1762652580.474049", + "retrieved_timestamp": "1762652580.47405", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Qwen-7B-Distill-Reasoner", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "prithivMLmods/Qwen-7B-Distill-Reasoner" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3395712265677292 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4409329229697952 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3950151057401813 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271812080536913 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36596874999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2818317819148936 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M/eacd8987-9631-4199-97ef-2cdc41879e8b.json b/data/hfopenllm_v2/alibaba/prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M/eacd8987-9631-4199-97ef-2cdc41879e8b.json new file mode 100644 index 000000000..ae9a366cf --- /dev/null +++ b/data/hfopenllm_v2/alibaba/prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M/eacd8987-9631-4199-97ef-2cdc41879e8b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Qwen2.5-14B-DeepSeek-R1-1M/1762652580.474647", + "retrieved_timestamp": "1762652580.474647", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4192808415005519 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5934849375153814 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5128398791540786 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33221476510067116 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4606041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48994348404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M/4edb337d-b56c-4009-9199-22223d4ff9f8.json b/data/hfopenllm_v2/alibaba/prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M/4edb337d-b56c-4009-9199-22223d4ff9f8.json new file mode 100644 index 000000000..26092916b --- /dev/null +++ b/data/hfopenllm_v2/alibaba/prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M/4edb337d-b56c-4009-9199-22223d4ff9f8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Qwen2.5-7B-DeepSeek-R1-1M/1762652580.474907", + "retrieved_timestamp": "1762652580.4749079", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18612282078219125 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3125554204779005 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3416875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12009640957446809 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/qingy2024/Qwen2.5-4B/c332cc18-e556-4b23-a45d-df26c250faa2.json b/data/hfopenllm_v2/alibaba/qingy2024/Qwen2.5-4B/c332cc18-e556-4b23-a45d-df26c250faa2.json new file mode 100644 index 000000000..757becf25 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/qingy2024/Qwen2.5-4B/c332cc18-e556-4b23-a45d-df26c250faa2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.5-4B/1762652580.486805", + "retrieved_timestamp": "1762652580.486807", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2024/Qwen2.5-4B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "qingy2024/Qwen2.5-4B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21584839337402537 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4269378314466817 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46103125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2524933510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 4.168 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-Coder-V2.5-Qwen-14b/4f7b356a-1484-458c-8bc1-2640e039ab70.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-Coder-V2.5-Qwen-14b/4f7b356a-1484-458c-8bc1-2640e039ab70.json new file mode 100644 index 000000000..ae24aa29d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-Coder-V2.5-Qwen-14b/4f7b356a-1484-458c-8bc1-2640e039ab70.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-Coder-V2.5-Qwen-14b/1762652580.496415", + "retrieved_timestamp": "1762652580.496416", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rombodawg/Rombos-Coder-V2.5-Qwen-14b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "rombodawg/Rombos-Coder-V2.5-Qwen-14b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7047445223119102 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6165135323666455 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3300604229607251 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3914583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3939494680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-Coder-V2.5-Qwen-7b/ca077d1a-a122-4040-b7d9-924773ce67ca.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-Coder-V2.5-Qwen-7b/ca077d1a-a122-4040-b7d9-924773ce67ca.json new file mode 100644 index 000000000..4a2c44996 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-Coder-V2.5-Qwen-7b/ca077d1a-a122-4040-b7d9-924773ce67ca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-Coder-V2.5-Qwen-7b/1762652580.4966788", + "retrieved_timestamp": "1762652580.49668", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rombodawg/Rombos-Coder-V2.5-Qwen-7b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "rombodawg/Rombos-Coder-V2.5-Qwen-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6210388436016436 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5077090028113894 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3338368580060423 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3979375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33976063829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-0.5b/796ed438-2be4-45e6-9de9-c98ddd51f3d4.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-0.5b/796ed438-2be4-45e6-9de9-c98ddd51f3d4.json new file mode 100644 index 000000000..6800e6ece --- /dev/null +++ b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-0.5b/796ed438-2be4-45e6-9de9-c98ddd51f3d4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-0.5b/1762652580.4969", + "retrieved_timestamp": "1762652580.4969", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rombodawg/Rombos-LLM-V2.5-Qwen-0.5b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "rombodawg/Rombos-LLM-V2.5-Qwen-0.5b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28466690603155187 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32936751831436256 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32358333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18658577127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-1.5b/51f579c0-b5b4-4e01-9c19-b68fb6a21210.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-1.5b/51f579c0-b5b4-4e01-9c19-b68fb6a21210.json new file mode 100644 index 000000000..0a80ca518 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-1.5b/51f579c0-b5b4-4e01-9c19-b68fb6a21210.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-1.5b/1762652580.497122", + "retrieved_timestamp": "1762652580.497123", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rombodawg/Rombos-LLM-V2.5-Qwen-1.5b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "rombodawg/Rombos-LLM-V2.5-Qwen-1.5b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3402461025634206 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4256703145864387 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08534743202416918 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4185520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2922207446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-14b/91ec838e-699a-4c68-aa42-a9f0b3b6b0c2.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-14b/91ec838e-699a-4c68-aa42-a9f0b3b6b0c2.json new file mode 100644 index 000000000..5e4103faf --- /dev/null +++ b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-14b/91ec838e-699a-4c68-aa42-a9f0b3b6b0c2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-14b/1762652580.4975061", + "retrieved_timestamp": "1762652580.497507", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rombodawg/Rombos-LLM-V2.5-Qwen-14b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "rombodawg/Rombos-LLM-V2.5-Qwen-14b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5840447789642593 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6481086261669653 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4554380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3716442953020134 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4717291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5375664893617021 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-32b/07e926c9-d8bb-41da-b41e-8fddc9fb99d8.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-32b/07e926c9-d8bb-41da-b41e-8fddc9fb99d8.json new file mode 100644 index 000000000..42247a717 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-32b/07e926c9-d8bb-41da-b41e-8fddc9fb99d8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-32b/1762652580.497819", + "retrieved_timestamp": "1762652580.49782", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rombodawg/Rombos-LLM-V2.5-Qwen-32b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "rombodawg/Rombos-LLM-V2.5-Qwen-32b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6826631116548536 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7045537070859799 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4954682779456193 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39681208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5034166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5915890957446809 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-3b/976e132a-8352-43fd-abdf-0fc4a04e9429.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-3b/976e132a-8352-43fd-abdf-0fc4a04e9429.json new file mode 100644 index 000000000..d0af513b8 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-3b/976e132a-8352-43fd-abdf-0fc4a04e9429.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-3b/1762652580.498058", + "retrieved_timestamp": "1762652580.498058", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rombodawg/Rombos-LLM-V2.5-Qwen-3b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "rombodawg/Rombos-LLM-V2.5-Qwen-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5342358276040905 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4808896246368473 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2794561933534743 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4041666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37608045212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-72b/1ae05e9f-d432-4e7f-a662-4b4a118333d9.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-72b/1ae05e9f-d432-4e7f-a662-4b4a118333d9.json new file mode 100644 index 000000000..3b4ca47cf --- /dev/null +++ b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-72b/1ae05e9f-d432-4e7f-a662-4b4a118333d9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-72b/1762652580.498325", + "retrieved_timestamp": "1762652580.498326", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rombodawg/Rombos-LLM-V2.5-Qwen-72b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "rombodawg/Rombos-LLM-V2.5-Qwen-72b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.715535889218385 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7229589065788488 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5422960725075529 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39848993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4599166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.593500664893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-7b/23ec1efe-a9a1-41cb-9695-4be0ceb3c199.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-7b/23ec1efe-a9a1-41cb-9695-4be0ceb3c199.json new file mode 100644 index 000000000..e8c91f574 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5-Qwen-7b/23ec1efe-a9a1-41cb-9695-4be0ceb3c199.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-7b/1762652580.498573", + "retrieved_timestamp": "1762652580.498574", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rombodawg/Rombos-LLM-V2.5-Qwen-7b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "rombodawg/Rombos-LLM-V2.5-Qwen-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6237117514860571 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5543885046903589 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3814199395770393 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42909375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4468916223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/3f1ffcf0-10bb-46b2-ae30-3eb958e943a1.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/3f1ffcf0-10bb-46b2-ae30-3eb958e943a1.json new file mode 100644 index 000000000..c4848a828 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/3f1ffcf0-10bb-46b2-ae30-3eb958e943a1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/1762652580.498805", + "retrieved_timestamp": "1762652580.498805", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2595125378440316 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3884043024656656 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09138972809667675 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39911458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27194148936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/91240596-5842-4441-b976-01ed7545bd1f.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/91240596-5842-4441-b976-01ed7545bd1f.json new file mode 100644 index 000000000..15a8604db --- /dev/null +++ b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.5.1-Qwen-3b/91240596-5842-4441-b976-01ed7545bd1f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/1762652580.499037", + "retrieved_timestamp": "1762652580.499037", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2566401592219755 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39000839740376536 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39911458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27410239361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.6-Qwen-14b/5842364a-2721-4882-90f3-97eba7c3b93a.json b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.6-Qwen-14b/5842364a-2721-4882-90f3-97eba7c3b93a.json new file mode 100644 index 000000000..31b859d81 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/rombodawg/Rombos-LLM-V2.6-Qwen-14b/5842364a-2721-4882-90f3-97eba7c3b93a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.6-Qwen-14b/1762652580.499588", + "retrieved_timestamp": "1762652580.4995892", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rombodawg/Rombos-LLM-V2.6-Qwen-14b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "rombodawg/Rombos-LLM-V2.6-Qwen-14b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8431550508207113 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6442096596344892 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5211480362537765 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3338926174496644 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4220625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49609375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/securin/Securin-LLM-V2.5-Qwen-1.5B/cbd0163f-fbea-4f40-a26b-a0508ec02061.json b/data/hfopenllm_v2/alibaba/securin/Securin-LLM-V2.5-Qwen-1.5B/cbd0163f-fbea-4f40-a26b-a0508ec02061.json new file mode 100644 index 000000000..cc3d9ae47 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/securin/Securin-LLM-V2.5-Qwen-1.5B/cbd0163f-fbea-4f40-a26b-a0508ec02061.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/securin_Securin-LLM-V2.5-Qwen-1.5B/1762652580.510926", + "retrieved_timestamp": "1762652580.5109272", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "securin/Securin-LLM-V2.5-Qwen-1.5B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "securin/Securin-LLM-V2.5-Qwen-1.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1492030035860406 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3158416288115425 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.024924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3606354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16148603723404256 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.543 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sethuiyer/Qwen2.5-7B-Anvita/f2571e64-be03-4482-b5b4-d120444b0586.json b/data/hfopenllm_v2/alibaba/sethuiyer/Qwen2.5-7B-Anvita/f2571e64-be03-4482-b5b4-d120444b0586.json new file mode 100644 index 000000000..48af2c9a3 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sethuiyer/Qwen2.5-7B-Anvita/f2571e64-be03-4482-b5b4-d120444b0586.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sethuiyer_Qwen2.5-7B-Anvita/1762652580.514066", + "retrieved_timestamp": "1762652580.514067", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sethuiyer/Qwen2.5-7B-Anvita", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sethuiyer/Qwen2.5-7B-Anvita" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6480416406246536 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5465860266784314 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20166163141993956 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271812080536913 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43365625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4165558510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/someon98/qwen-CoMa-0.5b/be4ee67a-59d7-4098-992e-5f75cd53cdbc.json b/data/hfopenllm_v2/alibaba/someon98/qwen-CoMa-0.5b/be4ee67a-59d7-4098-992e-5f75cd53cdbc.json new file mode 100644 index 000000000..54b109ff5 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/someon98/qwen-CoMa-0.5b/be4ee67a-59d7-4098-992e-5f75cd53cdbc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/someon98_qwen-CoMa-0.5b/1762652580.518077", + "retrieved_timestamp": "1762652580.5180779", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "someon98/qwen-CoMa-0.5b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "someon98/qwen-CoMa-0.5b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22766371006706648 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29533439538939815 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.004531722054380665 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23993288590604026 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40457291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10987367021276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Lamarck-14B-v0.4-Qwenvergence/41393c10-c1e5-4ccd-bcb1-df5392cb8ec6.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Lamarck-14B-v0.4-Qwenvergence/41393c10-c1e5-4ccd-bcb1-df5392cb8ec6.json new file mode 100644 index 000000000..9b119388e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Lamarck-14B-v0.4-Qwenvergence/41393c10-c1e5-4ccd-bcb1-df5392cb8ec6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.4-Qwenvergence/1762652580.5196202", + "retrieved_timestamp": "1762652580.5196211", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Lamarck-14B-v0.4-Qwenvergence", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Lamarck-14B-v0.4-Qwenvergence" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4906470387460826 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6535142192324058 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33987915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3783557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4846875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5406416223404256 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen-14B-ProseStock-v4/e68bc90b-1274-4e28-b280-65e6ceba53f8.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen-14B-ProseStock-v4/e68bc90b-1274-4e28-b280-65e6ceba53f8.json new file mode 100644 index 000000000..585967e2f --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen-14B-ProseStock-v4/e68bc90b-1274-4e28-b280-65e6ceba53f8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen-14B-ProseStock-v4/1762652580.522184", + "retrieved_timestamp": "1762652580.5221848", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwen-14B-ProseStock-v4", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwen-14B-ProseStock-v4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4942186731206532 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6498268976192769 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3640483383685801 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3884228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49383333333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5386469414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen-2.5-14B-Virmarckeoso/dc7af75a-f45a-449a-b6ba-cc033d7de79f.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen-2.5-14B-Virmarckeoso/dc7af75a-f45a-449a-b6ba-cc033d7de79f.json new file mode 100644 index 000000000..ce8eec1e4 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen-2.5-14B-Virmarckeoso/dc7af75a-f45a-449a-b6ba-cc033d7de79f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen-2.5-14B-Virmarckeoso/1762652580.5224378", + "retrieved_timestamp": "1762652580.522439", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwen-2.5-14B-Virmarckeoso", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwen-2.5-14B-Virmarckeoso" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4813295389566351 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6569729950776678 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3564954682779456 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37919463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4793541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5377327127659575 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v2/5242491e-deb4-41ae-8d70-5b0d8ffb7bc7.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v2/5242491e-deb4-41ae-8d70-5b0d8ffb7bc7.json new file mode 100644 index 000000000..d8056cc58 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v2/5242491e-deb4-41ae-8d70-5b0d8ffb7bc7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v2/1762652580.52286", + "retrieved_timestamp": "1762652580.522861", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4505301488938239 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6550336897572636 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3580060422960725 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3825503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48189583333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5379820478723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant/9df5ab5a-16cf-478f-87f0-1b8717e1e330.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant/9df5ab5a-16cf-478f-87f0-1b8717e1e330.json new file mode 100644 index 000000000..f5ee33869 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant/9df5ab5a-16cf-478f-87f0-1b8717e1e330.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-IF-Variant/1762652580.523307", + "retrieved_timestamp": "1762652580.523308", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6412973133507981 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5520788965536542 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2545317220543807 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34731543624161076 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5319166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4588597074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01/dd84656a-3b61-4241-a2eb-a5f52ff58ed2.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01/dd84656a-3b61-4241-a2eb-a5f52ff58ed2.json new file mode 100644 index 000000000..5eed745fd --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01/dd84656a-3b61-4241-a2eb-a5f52ff58ed2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-Prose01/1762652580.523516", + "retrieved_timestamp": "1762652580.523516", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6872343160591674 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6358769213927613 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3995468277945619 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38674496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48071875000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5275099734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock/ba7b8cb4-608a-4bf0-b107-51e721f88dee.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock/ba7b8cb4-608a-4bf0-b107-51e721f88dee.json new file mode 100644 index 000000000..7140ca4c6 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock/ba7b8cb4-608a-4bf0-b107-51e721f88dee.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-model_stock/1762652580.5237172", + "retrieved_timestamp": "1762652580.5237179", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7161852772864887 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6420915332649074 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4244712990936556 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3800335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47811458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5315824468085106 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3/9e453ef2-bae1-4a06-8778-d9c0dfae33e8.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3/9e453ef2-bae1-4a06-8778-d9c0dfae33e8.json new file mode 100644 index 000000000..532b1aec0 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3/9e453ef2-bae1-4a06-8778-d9c0dfae33e8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3/1762652580.52309", + "retrieved_timestamp": "1762652580.52309", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7256523801291683 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.641460062329604 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3800335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4806875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5343251329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso/b3b73406-3b25-4a23-9e13-53fafdd66552.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso/b3b73406-3b25-4a23-9e13-53fafdd66552.json new file mode 100644 index 000000000..9fafa7140 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-14B-Vimarckoso/b3b73406-3b25-4a23-9e13-53fafdd66552.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso/1762652580.522644", + "retrieved_timestamp": "1762652580.522645", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45742407922091166 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6446348390056346 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.338368580060423 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3926174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4858645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5329122340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose/dceb35c6-30bb-483c-aa62-8273b409311b.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose/dceb35c6-30bb-483c-aa62-8273b409311b.json new file mode 100644 index 000000000..1e1a42d35 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose/dceb35c6-30bb-483c-aa62-8273b409311b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Prose/1762652580.524123", + "retrieved_timestamp": "1762652580.524123", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5347101246913745 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5599089581177875 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2892749244712991 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45017708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4525432180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason/100a253a-3409-4145-8a9d-0bf821e3ce91.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason/100a253a-3409-4145-8a9d-0bf821e3ce91.json new file mode 100644 index 000000000..4a291e1e6 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason/100a253a-3409-4145-8a9d-0bf821e3ce91.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Reason/1762652580.5243258", + "retrieved_timestamp": "1762652580.5243268", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49172085621705963 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5498169530870823 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2620845921450151 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34060402684563756 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4434166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4306848404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1/174b2a17-c4fa-4021-868b-9c23a99603c9.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1/174b2a17-c4fa-4021-868b-9c23a99603c9.json new file mode 100644 index 000000000..79eadfa22 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwen2.5-7B-Gordion-v0.1/174b2a17-c4fa-4021-868b-9c23a99603c9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-7B-Gordion-v0.1/1762652580.5239239", + "retrieved_timestamp": "1762652580.523925", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.748183708116686 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5523808037550308 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29154078549848944 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40162499999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43001994680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentessential-14B-v1/3cce1e77-5dfc-44d2-b0c2-f7220d989e9d.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentessential-14B-v1/3cce1e77-5dfc-44d2-b0c2-f7220d989e9d.json new file mode 100644 index 000000000..54c8eedfc --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentessential-14B-v1/3cce1e77-5dfc-44d2-b0c2-f7220d989e9d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentessential-14B-v1/1762652580.524672", + "retrieved_timestamp": "1762652580.524674", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwentessential-14B-v1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwentessential-14B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6279083941719084 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6545165968552056 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4070996978851964 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3875838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4872916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5381482712765957 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v013/8127e367-fbd2-475d-a4f0-b8895dec6741.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v013/8127e367-fbd2-475d-a4f0-b8895dec6741.json new file mode 100644 index 000000000..284844c2b --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v013/8127e367-fbd2-475d-a4f0-b8895dec6741.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v013/1762652580.5250719", + "retrieved_timestamp": "1762652580.525074", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwentinuum-14B-v013", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwentinuum-14B-v013" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6711226213114536 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6086634082040333 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37084592145015105 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3573825503355705 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5154166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49908577127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v1/c68a024d-fa21-4584-bde5-42121e919af7.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v1/c68a024d-fa21-4584-bde5-42121e919af7.json new file mode 100644 index 000000000..a78e2e058 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v1/c68a024d-fa21-4584-bde5-42121e919af7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v1/1762652580.5253482", + "retrieved_timestamp": "1762652580.5253491", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwentinuum-14B-v1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwentinuum-14B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5031616111916382 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6572572845221036 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36027190332326287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3825503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4780520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5409740691489362 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v2/ce1feb87-4f78-4ff1-a548-b3409591166f.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v2/ce1feb87-4f78-4ff1-a548-b3409591166f.json new file mode 100644 index 000000000..2f39447b7 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v2/ce1feb87-4f78-4ff1-a548-b3409591166f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v2/1762652580.525585", + "retrieved_timestamp": "1762652580.525586", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwentinuum-14B-v2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwentinuum-14B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5378329499062487 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6555355668062347 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37537764350453173 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3884228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47141666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5408909574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v3/96b75db5-4e23-4179-bbf7-801f35d31af7.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v3/96b75db5-4e23-4179-bbf7-801f35d31af7.json new file mode 100644 index 000000000..18ac34a81 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v3/96b75db5-4e23-4179-bbf7-801f35d31af7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v3/1762652580.525815", + "retrieved_timestamp": "1762652580.525816", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwentinuum-14B-v3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwentinuum-14B-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6157683834448153 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6538645567116264 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35347432024169184 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3875838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48598958333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5413065159574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v5/16e0de9b-9717-4451-babc-8df8748c4efe.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v5/16e0de9b-9717-4451-babc-8df8748c4efe.json new file mode 100644 index 000000000..774ae77f2 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v5/16e0de9b-9717-4451-babc-8df8748c4efe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v5/1762652580.5261161", + "retrieved_timestamp": "1762652580.526117", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwentinuum-14B-v5", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwentinuum-14B-v5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.628557782240012 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.654985060704008 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34441087613293053 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3875838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4873854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5418051861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v6-Prose/8eecc1a5-d42e-423c-9155-daf66a414361.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v6-Prose/8eecc1a5-d42e-423c-9155-daf66a414361.json new file mode 100644 index 000000000..77fde0e80 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v6-Prose/8eecc1a5-d42e-423c-9155-daf66a414361.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v6-Prose/1762652580.52656", + "retrieved_timestamp": "1762652580.526561", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwentinuum-14B-v6-Prose", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwentinuum-14B-v6-Prose" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5642860942299764 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6545112522796068 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37009063444108764 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3884228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4912604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5392287234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v6/93e0bcb6-be72-4e9c-adbc-c8fce3240b0d.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v6/93e0bcb6-be72-4e9c-adbc-c8fce3240b0d.json new file mode 100644 index 000000000..1ac01b8c1 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v6/93e0bcb6-be72-4e9c-adbc-c8fce3240b0d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v6/1762652580.526352", + "retrieved_timestamp": "1762652580.526353", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwentinuum-14B-v6", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwentinuum-14B-v6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6304062110755019 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6544517420216159 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36027190332326287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38674496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48995833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5399767287234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v7/6aaa1633-f780-42d4-b43e-5a4d31cf7aae.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v7/6aaa1633-f780-42d4-b43e-5a4d31cf7aae.json new file mode 100644 index 000000000..569ccae13 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v7/6aaa1633-f780-42d4-b43e-5a4d31cf7aae.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v7/1762652580.526774", + "retrieved_timestamp": "1762652580.526774", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwentinuum-14B-v7", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwentinuum-14B-v7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6109223526908603 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6551430222697051 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35725075528700906 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39093959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48198958333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5409740691489362 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v8/6be09829-08e5-4d45-a091-5451f6c74d51.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v8/6be09829-08e5-4d45-a091-5451f6c74d51.json new file mode 100644 index 000000000..c977a3324 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v8/6be09829-08e5-4d45-a091-5451f6c74d51.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v8/1762652580.526987", + "retrieved_timestamp": "1762652580.526987", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwentinuum-14B-v8", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwentinuum-14B-v8" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5411552458587658 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6534258495008117 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39123867069486407 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38338926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48732291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5412234042553191 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v9/cea3e14d-a43d-4e32-b8fc-d8ae995190d8.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v9/cea3e14d-a43d-4e32-b8fc-d8ae995190d8.json new file mode 100644 index 000000000..26d3b46e0 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwentinuum-14B-v9/cea3e14d-a43d-4e32-b8fc-d8ae995190d8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v9/1762652580.5271978", + "retrieved_timestamp": "1762652580.527199", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwentinuum-14B-v9", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwentinuum-14B-v9" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5107304175144174 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6580257842849174 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34818731117824775 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3859060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47811458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5421376329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-qv256/f06fc349-e84e-4ec7-a9c9-8819896c2beb.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-qv256/f06fc349-e84e-4ec7-a9c9-8819896c2beb.json new file mode 100644 index 000000000..7521b02e6 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-qv256/f06fc349-e84e-4ec7-a9c9-8819896c2beb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-qv256/1762652580.52741", + "retrieved_timestamp": "1762652580.527411", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwenvergence-14B-qv256", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwenvergence-14B-qv256" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7006232352380573 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6312084721949004 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38972809667673713 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3783557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49259375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5177859042553191 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock/86591e86-5bfb-4e8e-b910-bf6b5011562c.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock/86591e86-5bfb-4e8e-b910-bf6b5011562c.json new file mode 100644 index 000000000..e85b3e090 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock/86591e86-5bfb-4e8e-b910-bf6b5011562c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v0.6-004-model_stock/1762652580.5276191", + "retrieved_timestamp": "1762652580.52762", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6859854076073706 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6249338707540049 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4093655589123867 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38338926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5033229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.519281914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v10/f2b35397-f539-4129-8e1f-f9dae9c9431b.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v10/f2b35397-f539-4129-8e1f-f9dae9c9431b.json new file mode 100644 index 000000000..4fac89f06 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v10/f2b35397-f539-4129-8e1f-f9dae9c9431b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v10/1762652580.5278451", + "retrieved_timestamp": "1762652580.5278451", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwenvergence-14B-v10", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwenvergence-14B-v10" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6756938257157675 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6316425399409628 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4788519637462236 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37919463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49913541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.523936170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v11/50ae9dc0-efcc-43cb-8704-6dfb9270656a.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v11/50ae9dc0-efcc-43cb-8704-6dfb9270656a.json new file mode 100644 index 000000000..2961ff5b9 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v11/50ae9dc0-efcc-43cb-8704-6dfb9270656a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v11/1762652580.528142", + "retrieved_timestamp": "1762652580.5281432", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwenvergence-14B-v11", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwenvergence-14B-v11" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7192327468893647 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6367548394062034 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4645015105740181 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3724832214765101 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4754479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5327460106382979 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v12-Prose-DS/a6c5b80d-e685-405a-8444-1be1ed763d2e.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v12-Prose-DS/a6c5b80d-e685-405a-8444-1be1ed763d2e.json new file mode 100644 index 000000000..e0ff01d64 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v12-Prose-DS/a6c5b80d-e685-405a-8444-1be1ed763d2e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v12-Prose-DS/1762652580.52859", + "retrieved_timestamp": "1762652580.5285912", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwenvergence-14B-v12-Prose-DS", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwenvergence-14B-v12-Prose-DS" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6173419859306639 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6506726813719318 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43051359516616317 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39429530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5150729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5369015957446809 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v12-Prose/052e63b2-028b-4a4a-ae2b-51514e982239.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v12-Prose/052e63b2-028b-4a4a-ae2b-51514e982239.json new file mode 100644 index 000000000..13ae2aea4 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v12-Prose/052e63b2-028b-4a4a-ae2b-51514e982239.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v12-Prose/1762652580.52837", + "retrieved_timestamp": "1762652580.5283709", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwenvergence-14B-v12-Prose", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwenvergence-14B-v12-Prose" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5412051135431766 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6504247508173936 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35347432024169184 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38674496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49913541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5381482712765957 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v13-Prose-DS/f205507c-48ef-4a40-a0e8-39f5f7bf2cdb.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v13-Prose-DS/f205507c-48ef-4a40-a0e8-39f5f7bf2cdb.json new file mode 100644 index 000000000..e2bf3265f --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v13-Prose-DS/f205507c-48ef-4a40-a0e8-39f5f7bf2cdb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v13-Prose-DS/1762652580.528805", + "retrieved_timestamp": "1762652580.528806", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwenvergence-14B-v13-Prose-DS", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwenvergence-14B-v13-Prose-DS" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.717808747456748 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6405077084802886 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3859516616314199 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38338926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49265625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.534906914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v15-Prose-MS/a9434630-a7cd-4dc1-b542-e76402344166.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v15-Prose-MS/a9434630-a7cd-4dc1-b542-e76402344166.json new file mode 100644 index 000000000..375113331 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v15-Prose-MS/a9434630-a7cd-4dc1-b542-e76402344166.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v15-Prose-MS/1762652580.529013", + "retrieved_timestamp": "1762652580.529014", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwenvergence-14B-v15-Prose-MS", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwenvergence-14B-v15-Prose-MS" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5032114788760489 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6550130348108012 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3632930513595166 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3951342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4912916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.539311835106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v2-Prose/f639d7e3-ffb9-4dc5-ab20-993522afa5b4.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v2-Prose/f639d7e3-ffb9-4dc5-ab20-993522afa5b4.json new file mode 100644 index 000000000..933c88b5d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v2-Prose/f639d7e3-ffb9-4dc5-ab20-993522afa5b4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v2-Prose/1762652580.529223", + "retrieved_timestamp": "1762652580.529224", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwenvergence-14B-v2-Prose", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwenvergence-14B-v2-Prose" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47048830436574957 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6518830473518972 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3557401812688822 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3934563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49259375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5371509308510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Prose/37c4d6b3-9964-45d3-a6ed-8b84229ed304.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Prose/37c4d6b3-9964-45d3-a6ed-8b84229ed304.json new file mode 100644 index 000000000..de26eea96 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Prose/37c4d6b3-9964-45d3-a6ed-8b84229ed304.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v3-Prose/1762652580.5297742", + "retrieved_timestamp": "1762652580.5297751", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwenvergence-14B-v3-Prose", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwenvergence-14B-v3-Prose" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49177072390147036 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6512913170949324 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3648036253776435 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3951342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49389583333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5369847074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Reason/50c37538-a425-4b30-a9e0-9a60f6b2492f.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Reason/50c37538-a425-4b30-a9e0-9a60f6b2492f.json new file mode 100644 index 000000000..205eb17aa --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Reason/50c37538-a425-4b30-a9e0-9a60f6b2492f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v3-Reason/1762652580.530208", + "retrieved_timestamp": "1762652580.530208", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwenvergence-14B-v3-Reason", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwenvergence-14B-v3-Reason" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5278161943642867 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6557437566824342 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3119335347432024 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38422818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47541666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5396442819148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Reason/58ac7b57-e498-4de0-95aa-475c9c56aaf6.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Reason/58ac7b57-e498-4de0-95aa-475c9c56aaf6.json new file mode 100644 index 000000000..89fb71c5b --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3-Reason/58ac7b57-e498-4de0-95aa-475c9c56aaf6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v3-Reason/1762652580.530001", + "retrieved_timestamp": "1762652580.530001", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwenvergence-14B-v3-Reason", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwenvergence-14B-v3-Reason" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5366837768232734 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6561283957466177 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3580060422960725 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38674496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47402083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5394780585106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3/6cefa467-dae0-4b8b-bd5c-3343f1bfe111.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3/6cefa467-dae0-4b8b-bd5c-3343f1bfe111.json new file mode 100644 index 000000000..8f91d2249 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v3/6cefa467-dae0-4b8b-bd5c-3343f1bfe111.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v3/1762652580.529505", + "retrieved_timestamp": "1762652580.529512", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwenvergence-14B-v3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwenvergence-14B-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.504410519643435 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.654823836148701 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3693353474320242 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38422818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48859375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5386469414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock/7f57b41f-d8e8-46a0-ad1f-2638e287bce7.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock/7f57b41f-d8e8-46a0-ad1f-2638e287bce7.json new file mode 100644 index 000000000..4ef51a35b --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock/7f57b41f-d8e8-46a0-ad1f-2638e287bce7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v6-Prose-model_stock/1762652580.530609", + "retrieved_timestamp": "1762652580.5306098", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48110458029140457 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6530441861690175 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36027190332326287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3934563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48989583333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5387300531914894 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v6-Prose/fa88bc37-eb6b-4d69-8983-7a489ab09665.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v6-Prose/fa88bc37-eb6b-4d69-8983-7a489ab09665.json new file mode 100644 index 000000000..93e2861fb --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v6-Prose/fa88bc37-eb6b-4d69-8983-7a489ab09665.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v6-Prose/1762652580.530398", + "retrieved_timestamp": "1762652580.530399", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwenvergence-14B-v6-Prose", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwenvergence-14B-v6-Prose" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5990073006289978 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6543750230807198 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3564954682779456 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3884228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48865625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5370678191489362 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v8/9332e745-f594-40a9-af22-98709efc179d.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v8/9332e745-f594-40a9-af22-98709efc179d.json new file mode 100644 index 000000000..777ff7dce --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v8/9332e745-f594-40a9-af22-98709efc179d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v8/1762652580.530813", + "retrieved_timestamp": "1762652580.530813", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwenvergence-14B-v8", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwenvergence-14B-v8" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5913387589373973 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6522455361956444 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40483383685800606 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47678125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.543467420212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v9/65c35557-ec37-49c3-b7f6-11ce837500f0.json b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v9/65c35557-ec37-49c3-b7f6-11ce837500f0.json new file mode 100644 index 000000000..6d6ac2297 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sometimesanotion/Qwenvergence-14B-v9/65c35557-ec37-49c3-b7f6-11ce837500f0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v9/1762652580.531015", + "retrieved_timestamp": "1762652580.5310159", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Qwenvergence-14B-v9", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sometimesanotion/Qwenvergence-14B-v9" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6598070896332842 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6165623747365094 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41389728096676737 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36828859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5141145833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5110538563829787 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sumink/Qwenftmodel/aece90fe-f0eb-4c34-afd0-7a4fc36dc385.json b/data/hfopenllm_v2/alibaba/sumink/Qwenftmodel/aece90fe-f0eb-4c34-afd0-7a4fc36dc385.json new file mode 100644 index 000000000..257279a6c --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sumink/Qwenftmodel/aece90fe-f0eb-4c34-afd0-7a4fc36dc385.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_Qwenftmodel/1762652580.5454028", + "retrieved_timestamp": "1762652580.545404", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/Qwenftmodel", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sumink/Qwenftmodel" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17290899258412123 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38226970256668574 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36171875000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23387632978723405 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sumink/Qwenmplus/fc41cf78-6547-4fe6-83aa-ef5edd99a392.json b/data/hfopenllm_v2/alibaba/sumink/Qwenmplus/fc41cf78-6547-4fe6-83aa-ef5edd99a392.json new file mode 100644 index 000000000..d9b4c88e7 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sumink/Qwenmplus/fc41cf78-6547-4fe6-83aa-ef5edd99a392.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_Qwenmplus/1762652580.5456882", + "retrieved_timestamp": "1762652580.545689", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/Qwenmplus", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sumink/Qwenmplus" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20403307668098425 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3675511408391697 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.024924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38283333333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19921875 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.543 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sumink/Qwensci/57a9ff0c-795f-45c4-b0c7-ad0c7400c88d.json b/data/hfopenllm_v2/alibaba/sumink/Qwensci/57a9ff0c-795f-45c4-b0c7-ad0c7400c88d.json new file mode 100644 index 000000000..2327f4066 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sumink/Qwensci/57a9ff0c-795f-45c4-b0c7-ad0c7400c88d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_Qwensci/1762652580.545888", + "retrieved_timestamp": "1762652580.5458891", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/Qwensci", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sumink/Qwensci" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17398281005509825 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3281870591856875 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3608854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12599734042553193 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.543 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sumink/bbhqwen/7c73720a-03d8-4d90-9557-cd579c7c3e86.json b/data/hfopenllm_v2/alibaba/sumink/bbhqwen/7c73720a-03d8-4d90-9557-cd579c7c3e86.json new file mode 100644 index 000000000..7f96fc0af --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sumink/bbhqwen/7c73720a-03d8-4d90-9557-cd579c7c3e86.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_bbhqwen/1762652580.546088", + "retrieved_timestamp": "1762652580.546089", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/bbhqwen", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sumink/bbhqwen" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18085236062536292 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3388245916050106 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43523958333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16165226063829788 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sumink/bbhqwen2/b4dbcb3f-11dd-4bce-9d45-869ae7c8f9b1.json b/data/hfopenllm_v2/alibaba/sumink/bbhqwen2/b4dbcb3f-11dd-4bce-9d45-869ae7c8f9b1.json new file mode 100644 index 000000000..f2b27274d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sumink/bbhqwen2/b4dbcb3f-11dd-4bce-9d45-869ae7c8f9b1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_bbhqwen2/1762652580.546288", + "retrieved_timestamp": "1762652580.546289", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/bbhqwen2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sumink/bbhqwen2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15329991090307052 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30663248168563745 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44305208333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1149434840425532 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sumink/bbhqwen3/b9dae1c0-8088-4ffb-9e91-0f6579b3147e.json b/data/hfopenllm_v2/alibaba/sumink/bbhqwen3/b9dae1c0-8088-4ffb-9e91-0f6579b3147e.json new file mode 100644 index 000000000..16e47c18d --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sumink/bbhqwen3/b9dae1c0-8088-4ffb-9e91-0f6579b3147e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_bbhqwen3/1762652580.546491", + "retrieved_timestamp": "1762652580.546491", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/bbhqwen3", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sumink/bbhqwen3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1942911474886634 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2950842029929075 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3796145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11660571808510638 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sumink/bbhqwen4/336dbfac-133a-46c8-87c9-40f1ad12a714.json b/data/hfopenllm_v2/alibaba/sumink/bbhqwen4/336dbfac-133a-46c8-87c9-40f1ad12a714.json new file mode 100644 index 000000000..5745c3da9 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sumink/bbhqwen4/336dbfac-133a-46c8-87c9-40f1ad12a714.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_bbhqwen4/1762652580.546697", + "retrieved_timestamp": "1762652580.546698", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/bbhqwen4", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sumink/bbhqwen4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14485675784695717 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3199395559502713 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24412751677852348 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4028958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15093085106382978 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sumink/bbhqwen5/4b528bc8-e94a-4437-8c1c-bcd823bf5f45.json b/data/hfopenllm_v2/alibaba/sumink/bbhqwen5/4b528bc8-e94a-4437-8c1c-bcd823bf5f45.json new file mode 100644 index 000000000..91f789e2b --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sumink/bbhqwen5/4b528bc8-e94a-4437-8c1c-bcd823bf5f45.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_bbhqwen5/1762652580.546902", + "retrieved_timestamp": "1762652580.5469031", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/bbhqwen5", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sumink/bbhqwen5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1521507378200951 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29130964476405813 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0022658610271903325 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4019375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11311502659574468 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/sumink/bbhqwen6/f585e5fe-c3b5-4134-97ed-67b57d74adb8.json b/data/hfopenllm_v2/alibaba/sumink/bbhqwen6/f585e5fe-c3b5-4134-97ed-67b57d74adb8.json new file mode 100644 index 000000000..56fe07f24 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/sumink/bbhqwen6/f585e5fe-c3b5-4134-97ed-67b57d74adb8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_bbhqwen6/1762652580.547101", + "retrieved_timestamp": "1762652580.547102", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/bbhqwen6", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "sumink/bbhqwen6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18929551368147626 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2782242419852629 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0007552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35796875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11527593085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/synergetic/FrankenQwen2.5-14B/5f69b85b-d66c-400b-8d40-58b96233ec3c.json b/data/hfopenllm_v2/alibaba/synergetic/FrankenQwen2.5-14B/5f69b85b-d66c-400b-8d40-58b96233ec3c.json new file mode 100644 index 000000000..80f877e72 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/synergetic/FrankenQwen2.5-14B/5f69b85b-d66c-400b-8d40-58b96233ec3c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/synergetic_FrankenQwen2.5-14B/1762652580.5505831", + "retrieved_timestamp": "1762652580.550584", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "synergetic/FrankenQwen2.5-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "synergetic/FrankenQwen2.5-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1869472998311148 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6047748435655343 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3842604166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43816489361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 16.972 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-14b-tensopolis-v1/a3ff3d30-5dec-4ec3-87b9-004d570b005a.json b/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-14b-tensopolis-v1/a3ff3d30-5dec-4ec3-87b9-004d570b005a.json new file mode 100644 index 000000000..65453582a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-14b-tensopolis-v1/a3ff3d30-5dec-4ec3-87b9-004d570b005a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tensopolis_qwen2.5-14b-tensopolis-v1/1762652580.556658", + "retrieved_timestamp": "1762652580.556659", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tensopolis/qwen2.5-14b-tensopolis-v1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "tensopolis/qwen2.5-14b-tensopolis-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7990166092634211 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6363595324538928 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5294561933534743 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3347315436241611 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41933333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49110704787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-3b-or1-tensopolis/b79e1f6d-698d-4bde-b35f-3f31e09c9d6a.json b/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-3b-or1-tensopolis/b79e1f6d-698d-4bde-b35f-3f31e09c9d6a.json new file mode 100644 index 000000000..44721a160 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-3b-or1-tensopolis/b79e1f6d-698d-4bde-b35f-3f31e09c9d6a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tensopolis_qwen2.5-3b-or1-tensopolis/1762652580.556941", + "retrieved_timestamp": "1762652580.556942", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tensopolis/qwen2.5-3b-or1-tensopolis", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "tensopolis/qwen2.5-3b-or1-tensopolis" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35400958346077294 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44214988544006467 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1729607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37492708333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3197307180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-7b-tensopolis-v1/20854e9f-ba11-492c-8d81-08e13ca1ec35.json b/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-7b-tensopolis-v1/20854e9f-ba11-492c-8d81-08e13ca1ec35.json new file mode 100644 index 000000000..0c2625ca5 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-7b-tensopolis-v1/20854e9f-ba11-492c-8d81-08e13ca1ec35.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tensopolis_qwen2.5-7b-tensopolis-v1/1762652580.5571609", + "retrieved_timestamp": "1762652580.557162", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tensopolis/qwen2.5-7b-tensopolis-v1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "tensopolis/qwen2.5-7b-tensopolis-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7660939640154789 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5378740884658956 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4561933534743202 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.433875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42686170212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-7b-tensopolis-v2/e7862d19-b3d4-47f6-b174-b53015229a42.json b/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-7b-tensopolis-v2/e7862d19-b3d4-47f6-b174-b53015229a42.json new file mode 100644 index 000000000..aa6fb06cf --- /dev/null +++ b/data/hfopenllm_v2/alibaba/tensopolis/qwen2.5-7b-tensopolis-v2/e7862d19-b3d4-47f6-b174-b53015229a42.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tensopolis_qwen2.5-7b-tensopolis-v2/1762652580.5574138", + "retrieved_timestamp": "1762652580.5574138", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tensopolis/qwen2.5-7b-tensopolis-v2", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "tensopolis/qwen2.5-7b-tensopolis-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.752105524452896 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5414622323974015 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42463541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42428523936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/theprint/ReWiz-Qwen-2.5-14B/9a4e6a55-e39e-4da6-b4bb-670cbd75d5c6.json b/data/hfopenllm_v2/alibaba/theprint/ReWiz-Qwen-2.5-14B/9a4e6a55-e39e-4da6-b4bb-670cbd75d5c6.json new file mode 100644 index 000000000..36a763cdb --- /dev/null +++ b/data/hfopenllm_v2/alibaba/theprint/ReWiz-Qwen-2.5-14B/9a4e6a55-e39e-4da6-b4bb-670cbd75d5c6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Qwen-2.5-14B/1762652580.563489", + "retrieved_timestamp": "1762652580.5634902", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "theprint/ReWiz-Qwen-2.5-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "theprint/ReWiz-Qwen-2.5-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27854647889821227 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6179492756426455 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29229607250755285 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3800335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45389583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5092253989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 16.743 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-1_5B/626a924c-618b-4047-bed3-9ff67b6e47ae.json b/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-1_5B/626a924c-618b-4047-bed3-9ff67b6e47ae.json new file mode 100644 index 000000000..0f910635f --- /dev/null +++ b/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-1_5B/626a924c-618b-4047-bed3-9ff67b6e47ae.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/thomas-yanxin_XinYuan-Qwen2-1_5B/1762652580.565519", + "retrieved_timestamp": "1762652580.565519", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "thomas-yanxin/XinYuan-Qwen2-1_5B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "thomas-yanxin/XinYuan-Qwen2-1_5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2985556102253133 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3635491993150823 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06722054380664652 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36339583333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23570478723404256 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-7B-0917/0fac57c3-7bea-48fc-bb38-b679ab835d91.json b/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-7B-0917/0fac57c3-7bea-48fc-bb38-b679ab835d91.json new file mode 100644 index 000000000..d52ef15b5 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-7B-0917/0fac57c3-7bea-48fc-bb38-b679ab835d91.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/thomas-yanxin_XinYuan-Qwen2-7B-0917/1762652580.56599", + "retrieved_timestamp": "1762652580.565991", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "thomas-yanxin/XinYuan-Qwen2-7B-0917", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "thomas-yanxin/XinYuan-Qwen2-7B-0917" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37191983935956596 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5169215573786009 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19788519637462235 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4401041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4245345744680851 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-7B/5e0690cd-21e6-4778-8af9-7d9f623f5f52.json b/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-7B/5e0690cd-21e6-4778-8af9-7d9f623f5f52.json new file mode 100644 index 000000000..6cc03251a --- /dev/null +++ b/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2-7B/5e0690cd-21e6-4778-8af9-7d9f623f5f52.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/thomas-yanxin_XinYuan-Qwen2-7B/1762652580.565779", + "retrieved_timestamp": "1762652580.56578", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "thomas-yanxin/XinYuan-Qwen2-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "thomas-yanxin/XinYuan-Qwen2-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44376033369238066 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4936629157238895 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14577039274924472 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40581249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3924534574468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2.5-7B-0917/6dc1a4e7-6ce6-4337-a242-420fe4139538.json b/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2.5-7B-0917/6dc1a4e7-6ce6-4337-a242-420fe4139538.json new file mode 100644 index 000000000..7e83a64a6 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/thomas-yanxin/XinYuan-Qwen2.5-7B-0917/6dc1a4e7-6ce6-4337-a242-420fe4139538.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/thomas-yanxin_XinYuan-Qwen2.5-7B-0917/1762652580.5662022", + "retrieved_timestamp": "1762652580.5662029", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "thomas-yanxin/XinYuan-Qwen2.5-7B-0917", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "thomas-yanxin/XinYuan-Qwen2.5-7B-0917" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35770644113175265 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5184106116987492 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1933534743202417 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3675520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38821476063829785 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/v000000/Qwen2.5-14B-Gutenberg-1e-Delta/676745af-1929-4875-9a78-d57354883d75.json b/data/hfopenllm_v2/alibaba/v000000/Qwen2.5-14B-Gutenberg-1e-Delta/676745af-1929-4875-9a78-d57354883d75.json new file mode 100644 index 000000000..904f2bd43 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/v000000/Qwen2.5-14B-Gutenberg-1e-Delta/676745af-1929-4875-9a78-d57354883d75.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/v000000_Qwen2.5-14B-Gutenberg-1e-Delta/1762652580.584905", + "retrieved_timestamp": "1762652580.584906", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "v000000/Qwen2.5-14B-Gutenberg-1e-Delta", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "v000000/Qwen2.5-14B-Gutenberg-1e-Delta" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8045120280854798 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.639849930188539 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5264350453172205 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288590604026846 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40730208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4930186170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/v000000/Qwen2.5-Lumen-14B/7b134cb3-7794-4984-9240-b889e2a3b6b4.json b/data/hfopenllm_v2/alibaba/v000000/Qwen2.5-Lumen-14B/7b134cb3-7794-4984-9240-b889e2a3b6b4.json new file mode 100644 index 000000000..36d14a54e --- /dev/null +++ b/data/hfopenllm_v2/alibaba/v000000/Qwen2.5-Lumen-14B/7b134cb3-7794-4984-9240-b889e2a3b6b4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/v000000_Qwen2.5-Lumen-14B/1762652580.585356", + "retrieved_timestamp": "1762652580.585357", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "v000000/Qwen2.5-Lumen-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "v000000/Qwen2.5-Lumen-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8063604569209697 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6390809511149668 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41139583333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49027593085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/vonjack/Qwen2.5-Coder-0.5B-Merged/76b52fe1-c232-47d9-8052-077a945364cd.json b/data/hfopenllm_v2/alibaba/vonjack/Qwen2.5-Coder-0.5B-Merged/76b52fe1-c232-47d9-8052-077a945364cd.json new file mode 100644 index 000000000..bdc68cf09 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/vonjack/Qwen2.5-Coder-0.5B-Merged/76b52fe1-c232-47d9-8052-077a945364cd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vonjack_Qwen2.5-Coder-0.5B-Merged/1762652580.5902011", + "retrieved_timestamp": "1762652580.590202", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vonjack/Qwen2.5-Coder-0.5B-Merged", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "vonjack/Qwen2.5-Coder-0.5B-Merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30997087727230416 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3076017752057237 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0377643504531722 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33034375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12017952127659574 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/wave-on-discord/qwent-7b/1dc524b8-18d6-4bc0-9146-713ef8abd983.json b/data/hfopenllm_v2/alibaba/wave-on-discord/qwent-7b/1dc524b8-18d6-4bc0-9146-713ef8abd983.json new file mode 100644 index 000000000..e0a7281b5 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/wave-on-discord/qwent-7b/1dc524b8-18d6-4bc0-9146-713ef8abd983.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/wave-on-discord_qwent-7b/1762652580.592784", + "retrieved_timestamp": "1762652580.592785", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wave-on-discord/qwent-7b", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "wave-on-discord/qwent-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20148539209297997 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4228103286118343 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0037764350453172208 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38165625000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16032247340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/win10/EVA-Norns-Qwen2.5-v0.1/5b8044df-ce6a-4a5e-9aed-d657188fa114.json b/data/hfopenllm_v2/alibaba/win10/EVA-Norns-Qwen2.5-v0.1/5b8044df-ce6a-4a5e-9aed-d657188fa114.json new file mode 100644 index 000000000..7f7e56a32 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/win10/EVA-Norns-Qwen2.5-v0.1/5b8044df-ce6a-4a5e-9aed-d657188fa114.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/win10_EVA-Norns-Qwen2.5-v0.1/1762652580.594388", + "retrieved_timestamp": "1762652580.594388", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "win10/EVA-Norns-Qwen2.5-v0.1", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "win10/EVA-Norns-Qwen2.5-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6219630580193884 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.507240838017382 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26132930513595165 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40451041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3425033244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/win10/Norns-Qwen2.5-12B/4ff2e991-ee62-467e-9fec-cdf334ca7fca.json b/data/hfopenllm_v2/alibaba/win10/Norns-Qwen2.5-12B/4ff2e991-ee62-467e-9fec-cdf334ca7fca.json new file mode 100644 index 000000000..f3cfda531 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/win10/Norns-Qwen2.5-12B/4ff2e991-ee62-467e-9fec-cdf334ca7fca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/win10_Norns-Qwen2.5-12B/1762652580.594881", + "retrieved_timestamp": "1762652580.594882", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "win10/Norns-Qwen2.5-12B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "win10/Norns-Qwen2.5-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48969733640074997 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46189201103923744 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08383685800604229 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3554895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2660405585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 12.277 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/win10/Norns-Qwen2.5-7B/2451252e-2cf6-4394-9009-544630696c75.json b/data/hfopenllm_v2/alibaba/win10/Norns-Qwen2.5-7B/2451252e-2cf6-4394-9009-544630696c75.json new file mode 100644 index 000000000..3c29c9340 --- /dev/null +++ b/data/hfopenllm_v2/alibaba/win10/Norns-Qwen2.5-7B/2451252e-2cf6-4394-9009-544630696c75.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/win10_Norns-Qwen2.5-7B/1762652580.5950878", + "retrieved_timestamp": "1762652580.595089", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "win10/Norns-Qwen2.5-7B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "win10/Norns-Qwen2.5-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6122211288270678 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5072887832228614 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2628398791540785 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40847916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34133976063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alibaba/x0000001/Deepseek-Lumen-R1-Qwen2.5-14B/9d6eb7bc-965e-4de8-bccf-0590ad55ce6d.json b/data/hfopenllm_v2/alibaba/x0000001/Deepseek-Lumen-R1-Qwen2.5-14B/9d6eb7bc-965e-4de8-bccf-0590ad55ce6d.json new file mode 100644 index 000000000..f5e00a60b --- /dev/null +++ b/data/hfopenllm_v2/alibaba/x0000001/Deepseek-Lumen-R1-Qwen2.5-14B/9d6eb7bc-965e-4de8-bccf-0590ad55ce6d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/x0000001_Deepseek-Lumen-R1-Qwen2.5-14B/1762652580.596637", + "retrieved_timestamp": "1762652580.596638", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "x0000001/Deepseek-Lumen-R1-Qwen2.5-14B", + "developer": "alibaba", + "inference_platform": "unknown", + "id": "x0000001/Deepseek-Lumen-R1-Qwen2.5-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4436107306391486 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45690468424066283 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27794561933534745 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47396875000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4379155585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-DPO/b790e9c5-2412-4aa0-a975-37b8662a82cf.json b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-DPO/b790e9c5-2412-4aa0-a975-37b8662a82cf.json new file mode 100644 index 000000000..570d57724 --- /dev/null +++ b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-DPO/b790e9c5-2412-4aa0-a975-37b8662a82cf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-70B-DPO/1762652579.9821", + "retrieved_timestamp": "1762652579.982101", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allenai/Llama-3.1-Tulu-3-70B-DPO", + "developer": "allenai", + "inference_platform": "unknown", + "id": "allenai/Llama-3.1-Tulu-3-70B-DPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8281925291559729 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6146203626958501 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44939577039274925 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37583892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4922604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4632646276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-SFT/6921281e-5756-4f0d-a37c-3b05ff6b2703.json b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-SFT/6921281e-5756-4f0d-a37c-3b05ff6b2703.json new file mode 100644 index 000000000..7905f87c5 --- /dev/null +++ b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-70B-SFT/6921281e-5756-4f0d-a37c-3b05ff6b2703.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-70B-SFT/1762652579.982346", + "retrieved_timestamp": "1762652579.982346", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allenai/Llama-3.1-Tulu-3-70B-SFT", + "developer": "allenai", + "inference_platform": "unknown", + "id": "allenai/Llama-3.1-Tulu-3-70B-SFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8050616807847621 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5951437800580934 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33157099697885195 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3447986577181208 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5026145833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46243351063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-DPO/81bd1edf-be5b-4ae6-a2cc-723aaa040eb9.json b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-DPO/81bd1edf-be5b-4ae6-a2cc-723aaa040eb9.json new file mode 100644 index 000000000..6d43708be --- /dev/null +++ b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-DPO/81bd1edf-be5b-4ae6-a2cc-723aaa040eb9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B-DPO/1762652579.9829278", + "retrieved_timestamp": "1762652579.982929", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allenai/Llama-3.1-Tulu-3-8B-DPO", + "developer": "allenai", + "inference_platform": "unknown", + "id": "allenai/Llama-3.1-Tulu-3-8B-DPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8029384255996312 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4079428557044153 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.236404833836858 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41613541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2898105053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-SFT/35674acb-a68c-4ac1-9aac-ac9cb44801e6.json b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-SFT/35674acb-a68c-4ac1-9aac-ac9cb44801e6.json new file mode 100644 index 000000000..736b53a5e --- /dev/null +++ b/data/hfopenllm_v2/allenai/Llama-3.1-Tulu-3-8B-SFT/35674acb-a68c-4ac1-9aac-ac9cb44801e6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B-SFT/1762652579.983397", + "retrieved_timestamp": "1762652579.983398", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allenai/Llama-3.1-Tulu-3-8B-SFT", + "developer": "allenai", + "inference_platform": "unknown", + "id": "allenai/Llama-3.1-Tulu-3-8B-SFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7403400754442657 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3871863270501647 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11782477341389729 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4267708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28116688829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/OLMo-1.7-7B-hf/5d7caae7-0242-4a5d-b3be-c677b958d130.json b/data/hfopenllm_v2/allenai/OLMo-1.7-7B-hf/5d7caae7-0242-4a5d-b3be-c677b958d130.json new file mode 100644 index 000000000..167c2b63d --- /dev/null +++ b/data/hfopenllm_v2/allenai/OLMo-1.7-7B-hf/5d7caae7-0242-4a5d-b3be-c677b958d130.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allenai_OLMo-1.7-7B-hf/1762652579.9836009", + "retrieved_timestamp": "1762652579.9836018", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allenai/OLMo-1.7-7B-hf", + "developer": "allenai", + "inference_platform": "unknown", + "id": "allenai/OLMo-1.7-7B-hf" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1568970332052288 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3013695911207614 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0022658610271903325 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2550335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34748958333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11236702127659574 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Unknown", + "params_billions": 0.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/OLMo-1B-hf/d13f5416-1d95-431b-8f01-b969066ec960.json b/data/hfopenllm_v2/allenai/OLMo-1B-hf/d13f5416-1d95-431b-8f01-b969066ec960.json new file mode 100644 index 000000000..b986f0951 --- /dev/null +++ b/data/hfopenllm_v2/allenai/OLMo-1B-hf/d13f5416-1d95-431b-8f01-b969066ec960.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allenai_OLMo-1B-hf/1762652579.983823", + "retrieved_timestamp": "1762652579.983823", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allenai/OLMo-1B-hf", + "developer": "allenai", + "inference_platform": "unknown", + "id": "allenai/OLMo-1B-hf" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21819660722438686 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30519468988429327 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.017371601208459216 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40978125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11735372340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "OlmoForCausalLM", + "params_billions": 1.177 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/OLMo-2-1124-7B-Instruct/17df660f-6a91-476f-a7e8-7169eef1c24d.json b/data/hfopenllm_v2/allenai/OLMo-2-1124-7B-Instruct/17df660f-6a91-476f-a7e8-7169eef1c24d.json new file mode 100644 index 000000000..ccfccbab9 --- /dev/null +++ b/data/hfopenllm_v2/allenai/OLMo-2-1124-7B-Instruct/17df660f-6a91-476f-a7e8-7169eef1c24d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allenai_OLMo-2-1124-7B-Instruct/1762652579.9840362", + "retrieved_timestamp": "1762652579.9840372", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allenai/OLMo-2-1124-7B-Instruct", + "developer": "allenai", + "inference_platform": "unknown", + "id": "allenai/OLMo-2-1124-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7244034716773715 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40223602474417786 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1487915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35083333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2672041223404255 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Olmo2ForCausalLM", + "params_billions": 7.299 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/OLMo-7B-Instruct-hf/7ff78ffd-c934-4a17-b30d-2d8267f3e25a.json b/data/hfopenllm_v2/allenai/OLMo-7B-Instruct-hf/7ff78ffd-c934-4a17-b30d-2d8267f3e25a.json new file mode 100644 index 000000000..a9e86c4b2 --- /dev/null +++ b/data/hfopenllm_v2/allenai/OLMo-7B-Instruct-hf/7ff78ffd-c934-4a17-b30d-2d8267f3e25a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allenai_OLMo-7B-Instruct-hf/1762652579.98445", + "retrieved_timestamp": "1762652579.984452", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allenai/OLMo-7B-Instruct-hf", + "developer": "allenai", + "inference_platform": "unknown", + "id": "allenai/OLMo-7B-Instruct-hf" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3472652561869174 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3706469866662716 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37647916666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17852393617021275 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "OlmoForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/OLMo-7B-hf/6308f97d-aecd-467a-91f0-5a1650ccc22a.json b/data/hfopenllm_v2/allenai/OLMo-7B-hf/6308f97d-aecd-467a-91f0-5a1650ccc22a.json new file mode 100644 index 000000000..3e9e0c916 --- /dev/null +++ b/data/hfopenllm_v2/allenai/OLMo-7B-hf/6308f97d-aecd-467a-91f0-5a1650ccc22a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allenai_OLMo-7B-hf/1762652579.984753", + "retrieved_timestamp": "1762652579.984753", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allenai/OLMo-7B-hf", + "developer": "allenai", + "inference_platform": "unknown", + "id": "allenai/OLMo-7B-hf" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2719273749207658 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32791316587362274 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3486666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11727061170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "OlmoForCausalLM", + "params_billions": 6.888 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0125-Instruct/af176c4c-b06f-44ac-bcba-1331d9148958.json b/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0125-Instruct/af176c4c-b06f-44ac-bcba-1331d9148958.json new file mode 100644 index 000000000..ac6d05767 --- /dev/null +++ b/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0125-Instruct/af176c4c-b06f-44ac-bcba-1331d9148958.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allenai_OLMoE-1B-7B-0125-Instruct/1762652579.984983", + "retrieved_timestamp": "1762652579.984983", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allenai/OLMoE-1B-7B-0125-Instruct", + "developer": "allenai", + "inference_platform": "unknown", + "id": "allenai/OLMoE-1B-7B-0125-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6757436934001781 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38245348916008676 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08987915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3635833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19148936170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "OlmoeForCausalLM", + "params_billions": 6.919 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924-Instruct/a580b690-0829-43b9-8d52-6dd226208901.json b/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924-Instruct/a580b690-0829-43b9-8d52-6dd226208901.json new file mode 100644 index 000000000..5545b69e0 --- /dev/null +++ b/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924-Instruct/a580b690-0829-43b9-8d52-6dd226208901.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allenai_OLMoE-1B-7B-0924-Instruct/1762652579.98542", + "retrieved_timestamp": "1762652579.98542", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allenai/OLMoE-1B-7B-0924-Instruct", + "developer": "allenai", + "inference_platform": "unknown", + "id": "allenai/OLMoE-1B-7B-0924-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4667415790103592 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3901610626816106 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3848229166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18758311170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "OlmoeForCausalLM", + "params_billions": 6.919 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924/af1bb542-77cb-47e2-89f1-16cc91e89452.json b/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924/af1bb542-77cb-47e2-89f1-16cc91e89452.json new file mode 100644 index 000000000..c6ede33c3 --- /dev/null +++ b/data/hfopenllm_v2/allenai/OLMoE-1B-7B-0924/af1bb542-77cb-47e2-89f1-16cc91e89452.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allenai_OLMoE-1B-7B-0924/1762652579.985209", + "retrieved_timestamp": "1762652579.9852102", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allenai/OLMoE-1B-7B-0924", + "developer": "allenai", + "inference_platform": "unknown", + "id": "allenai/OLMoE-1B-7B-0924" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21847143357402804 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3393437931177341 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24748322147651006 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34879166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1739527925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "OlmoeForCausalLM", + "params_billions": 6.919 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Chocolatine-24B/9d3d89f9-e792-4b33-91d1-41f84ca1cc68.json b/data/hfopenllm_v2/allknowingroger/Chocolatine-24B/9d3d89f9-e792-4b33-91d1-41f84ca1cc68.json new file mode 100644 index 000000000..75060bc0e --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Chocolatine-24B/9d3d89f9-e792-4b33-91d1-41f84ca1cc68.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Chocolatine-24B/1762652579.9856288", + "retrieved_timestamp": "1762652579.98563", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Chocolatine-24B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Chocolatine-24B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19581488229010136 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6191260063262436 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0007552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32550335570469796 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43232291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4566156914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 24.184 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/HomerSlerp1-7B/340dfc7b-9af0-4545-9d7b-6950ea69bd57.json b/data/hfopenllm_v2/allknowingroger/HomerSlerp1-7B/340dfc7b-9af0-4545-9d7b-6950ea69bd57.json new file mode 100644 index 000000000..adc973a8e --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/HomerSlerp1-7B/340dfc7b-9af0-4545-9d7b-6950ea69bd57.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_HomerSlerp1-7B/1762652579.988248", + "retrieved_timestamp": "1762652579.988249", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/HomerSlerp1-7B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/HomerSlerp1-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46212050692163464 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.551818027489446 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2719033232628399 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43585416666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4503823138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/HomerSlerp2-7B/ea9cc238-75d0-45e7-b10e-e214516ca36e.json b/data/hfopenllm_v2/allknowingroger/HomerSlerp2-7B/ea9cc238-75d0-45e7-b10e-e214516ca36e.json new file mode 100644 index 000000000..2ff386ed6 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/HomerSlerp2-7B/ea9cc238-75d0-45e7-b10e-e214516ca36e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_HomerSlerp2-7B/1762652579.988459", + "retrieved_timestamp": "1762652579.98846", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/HomerSlerp2-7B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/HomerSlerp2-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44868172005833407 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5648943315947 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29682779456193353 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43557291666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45146276595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/HomerSlerp3-7B/a8a69b0c-02c9-437d-975d-69f1ddc6959a.json b/data/hfopenllm_v2/allknowingroger/HomerSlerp3-7B/a8a69b0c-02c9-437d-975d-69f1ddc6959a.json new file mode 100644 index 000000000..50e886a4f --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/HomerSlerp3-7B/a8a69b0c-02c9-437d-975d-69f1ddc6959a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_HomerSlerp3-7B/1762652579.988729", + "retrieved_timestamp": "1762652579.9887302", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/HomerSlerp3-7B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/HomerSlerp3-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4362668829815999 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5598063466560873 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3021148036253776 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44617708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45345744680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/HomerSlerp4-7B/988da677-c00d-4e7c-847e-6ca553e0124b.json b/data/hfopenllm_v2/allknowingroger/HomerSlerp4-7B/988da677-c00d-4e7c-847e-6ca553e0124b.json new file mode 100644 index 000000000..fd6785d19 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/HomerSlerp4-7B/988da677-c00d-4e7c-847e-6ca553e0124b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_HomerSlerp4-7B/1762652579.988936", + "retrieved_timestamp": "1762652579.988937", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/HomerSlerp4-7B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/HomerSlerp4-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43741605606457534 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5570767234678723 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3270392749244713 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44084375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44722406914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/LimyQstar-7B-slerp/ac45b8ec-454f-4a91-9418-a3dc70535119.json b/data/hfopenllm_v2/allknowingroger/LimyQstar-7B-slerp/ac45b8ec-454f-4a91-9418-a3dc70535119.json new file mode 100644 index 000000000..9e97b0bd2 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/LimyQstar-7B-slerp/ac45b8ec-454f-4a91-9418-a3dc70535119.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_LimyQstar-7B-slerp/1762652579.98914", + "retrieved_timestamp": "1762652579.989141", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/LimyQstar-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/LimyQstar-7B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34911368502240725 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5023559424245442 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06873111782477341 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4146458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3103390957446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Marco-01-slerp1-7B/1b8abf32-6b66-4e9b-9b82-e1978d07a483.json b/data/hfopenllm_v2/allknowingroger/Marco-01-slerp1-7B/1b8abf32-6b66-4e9b-9b82-e1978d07a483.json new file mode 100644 index 000000000..d5a360d48 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Marco-01-slerp1-7B/1b8abf32-6b66-4e9b-9b82-e1978d07a483.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Marco-01-slerp1-7B/1762652579.989768", + "retrieved_timestamp": "1762652579.98977", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Marco-01-slerp1-7B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Marco-01-slerp1-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46811571075856506 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5540943469864194 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3157099697885196 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4451875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44830452127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Meme-7B-slerp/8eaa7d3f-0217-4ed3-9367-9e0f9c0926fe.json b/data/hfopenllm_v2/allknowingroger/Meme-7B-slerp/8eaa7d3f-0217-4ed3-9367-9e0f9c0926fe.json new file mode 100644 index 000000000..87b5f1c80 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Meme-7B-slerp/8eaa7d3f-0217-4ed3-9367-9e0f9c0926fe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Meme-7B-slerp/1762652579.9900281", + "retrieved_timestamp": "1762652579.990029", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Meme-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Meme-7B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5163754393897082 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4660944195552204 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4223020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.281000664893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Ministral-8B-slerp/effba194-3b2a-4847-9708-e3cb62a7c964.json b/data/hfopenllm_v2/allknowingroger/Ministral-8B-slerp/effba194-3b2a-4847-9708-e3cb62a7c964.json new file mode 100644 index 000000000..e0c5b7973 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Ministral-8B-slerp/effba194-3b2a-4847-9708-e3cb62a7c964.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Ministral-8B-slerp/1762652579.990243", + "retrieved_timestamp": "1762652579.9902442", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Ministral-8B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Ministral-8B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19608970863974257 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4686018544963986 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0037764350453172208 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42853125000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3119182180851064 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MixTAO-19B-pass/275fb96e-4779-479b-937b-f5db6aa530ea.json b/data/hfopenllm_v2/allknowingroger/MixTAO-19B-pass/275fb96e-4779-479b-937b-f5db6aa530ea.json new file mode 100644 index 000000000..66843533d --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MixTAO-19B-pass/275fb96e-4779-479b-937b-f5db6aa530ea.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_MixTAO-19B-pass/1762652579.991234", + "retrieved_timestamp": "1762652579.991235", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/MixTAO-19B-pass", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/MixTAO-19B-pass" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3814368098866563 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5128248798224987 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47827083333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31050531914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 19.188 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MixTaoTruthful-13B-slerp/003c05a1-abb7-41d3-a264-efc6923b64ef.json b/data/hfopenllm_v2/allknowingroger/MixTaoTruthful-13B-slerp/003c05a1-abb7-41d3-a264-efc6923b64ef.json new file mode 100644 index 000000000..c195b1ec8 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MixTaoTruthful-13B-slerp/003c05a1-abb7-41d3-a264-efc6923b64ef.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_MixTaoTruthful-13B-slerp/1762652579.991453", + "retrieved_timestamp": "1762652579.991454", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/MixTaoTruthful-13B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/MixTaoTruthful-13B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41388515804731446 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5207335343585151 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42924999999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3100066489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiCalm-7B-slerp/36176ae9-e852-4604-9961-b7f02e4c3e55.json b/data/hfopenllm_v2/allknowingroger/MultiCalm-7B-slerp/36176ae9-e852-4604-9961-b7f02e4c3e55.json new file mode 100644 index 000000000..7be0632d9 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiCalm-7B-slerp/36176ae9-e852-4604-9961-b7f02e4c3e55.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiCalm-7B-slerp/1762652579.991671", + "retrieved_timestamp": "1762652579.991672", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/MultiCalm-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/MultiCalm-7B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3926526061960044 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5121891599770304 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.061933534743202415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43194791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3032746010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash-12B-slerp/ed27cd90-e73f-4432-aed9-dd36f29cba1a.json b/data/hfopenllm_v2/allknowingroger/MultiMash-12B-slerp/ed27cd90-e73f-4432-aed9-dd36f29cba1a.json new file mode 100644 index 000000000..4b1e94011 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiMash-12B-slerp/ed27cd90-e73f-4432-aed9-dd36f29cba1a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash-12B-slerp/1762652579.991891", + "retrieved_timestamp": "1762652579.9918919", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/MultiMash-12B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/MultiMash-12B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39744876926554873 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5141827379810838 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08081570996978851 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44379166666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3067652925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash10-13B-slerp/7e4b1f44-73f9-4a6d-9d66-91c60e69e3d2.json b/data/hfopenllm_v2/allknowingroger/MultiMash10-13B-slerp/7e4b1f44-73f9-4a6d-9d66-91c60e69e3d2.json new file mode 100644 index 000000000..e05563b6b --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiMash10-13B-slerp/7e4b1f44-73f9-4a6d-9d66-91c60e69e3d2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash10-13B-slerp/1762652579.992115", + "retrieved_timestamp": "1762652579.992116", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/MultiMash10-13B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/MultiMash10-13B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41628323958208663 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5186335995744094 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07175226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43179166666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3116688829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash11-13B-slerp/1b3bfb2a-8290-4af0-bdac-24397a5b6f86.json b/data/hfopenllm_v2/allknowingroger/MultiMash11-13B-slerp/1b3bfb2a-8290-4af0-bdac-24397a5b6f86.json new file mode 100644 index 000000000..4ede973ca --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiMash11-13B-slerp/1b3bfb2a-8290-4af0-bdac-24397a5b6f86.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash11-13B-slerp/1762652579.992343", + "retrieved_timestamp": "1762652579.9923441", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/MultiMash11-13B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/MultiMash11-13B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4251009543566625 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5193864686484946 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0702416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43728125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30851063829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash2-12B-slerp/af52a422-e959-4662-98e8-c94fa83bee3e.json b/data/hfopenllm_v2/allknowingroger/MultiMash2-12B-slerp/af52a422-e959-4662-98e8-c94fa83bee3e.json new file mode 100644 index 000000000..2a5d0962b --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiMash2-12B-slerp/af52a422-e959-4662-98e8-c94fa83bee3e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash2-12B-slerp/1762652579.992556", + "retrieved_timestamp": "1762652579.992556", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/MultiMash2-12B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/MultiMash2-12B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42607503645881817 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5133973498532299 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06419939577039276 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4228020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3042719414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash5-12B-slerp/df7621bc-5af2-45c5-b8e4-ebc158dad966.json b/data/hfopenllm_v2/allknowingroger/MultiMash5-12B-slerp/df7621bc-5af2-45c5-b8e4-ebc158dad966.json new file mode 100644 index 000000000..28ce8261d --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiMash5-12B-slerp/df7621bc-5af2-45c5-b8e4-ebc158dad966.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash5-12B-slerp/1762652579.992772", + "retrieved_timestamp": "1762652579.992772", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/MultiMash5-12B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/MultiMash5-12B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41415998439695567 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5144534995858502 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4202916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30277593085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash6-12B-slerp/195b1c31-c766-479c-a445-39a6150404fc.json b/data/hfopenllm_v2/allknowingroger/MultiMash6-12B-slerp/195b1c31-c766-479c-a445-39a6150404fc.json new file mode 100644 index 000000000..064be03c1 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiMash6-12B-slerp/195b1c31-c766-479c-a445-39a6150404fc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash6-12B-slerp/1762652579.992992", + "retrieved_timestamp": "1762652579.992993", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/MultiMash6-12B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/MultiMash6-12B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43004672047943904 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5195916915718951 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07250755287009064 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4305833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30909242021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash7-12B-slerp/141507b5-67df-4c38-9eeb-b9d3cf98b08f.json b/data/hfopenllm_v2/allknowingroger/MultiMash7-12B-slerp/141507b5-67df-4c38-9eeb-b9d3cf98b08f.json new file mode 100644 index 000000000..f83395c61 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiMash7-12B-slerp/141507b5-67df-4c38-9eeb-b9d3cf98b08f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash7-12B-slerp/1762652579.993205", + "retrieved_timestamp": "1762652579.993206", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/MultiMash7-12B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/MultiMash7-12B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42127887338927383 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5111135397195524 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06948640483383686 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42794791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3029421542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash8-13B-slerp/54a836bc-8048-4c2b-a65a-937acc2fa414.json b/data/hfopenllm_v2/allknowingroger/MultiMash8-13B-slerp/54a836bc-8048-4c2b-a65a-937acc2fa414.json new file mode 100644 index 000000000..a4512a92e --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiMash8-13B-slerp/54a836bc-8048-4c2b-a65a-937acc2fa414.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash8-13B-slerp/1762652579.9938078", + "retrieved_timestamp": "1762652579.99381", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/MultiMash8-13B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/MultiMash8-13B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4320702402957486 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5178483059643324 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0770392749244713 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4423958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31258311170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMash9-13B-slerp/6a0f5973-6377-4707-a0e3-414ca1f22b32.json b/data/hfopenllm_v2/allknowingroger/MultiMash9-13B-slerp/6a0f5973-6377-4707-a0e3-414ca1f22b32.json new file mode 100644 index 000000000..73796c013 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiMash9-13B-slerp/6a0f5973-6377-4707-a0e3-414ca1f22b32.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash9-13B-slerp/1762652579.994061", + "retrieved_timestamp": "1762652579.994061", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/MultiMash9-13B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/MultiMash9-13B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4187810564856802 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5193579939678727 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07854984894259819 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4398229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3100066489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiMerge-7B-slerp/f0aae363-f838-48c8-bf9e-b8e9f0e84a24.json b/data/hfopenllm_v2/allknowingroger/MultiMerge-7B-slerp/f0aae363-f838-48c8-bf9e-b8e9f0e84a24.json new file mode 100644 index 000000000..85a7faca9 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiMerge-7B-slerp/f0aae363-f838-48c8-bf9e-b8e9f0e84a24.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMerge-7B-slerp/1762652579.994297", + "retrieved_timestamp": "1762652579.994299", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/MultiMerge-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/MultiMerge-7B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3947758613811354 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5140224933103638 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42797916666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036901595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Multimash3-12B-slerp/80aa0629-7ea1-4f69-b302-c0502abcbbab.json b/data/hfopenllm_v2/allknowingroger/Multimash3-12B-slerp/80aa0629-7ea1-4f69-b302-c0502abcbbab.json new file mode 100644 index 000000000..4f3af69c4 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Multimash3-12B-slerp/80aa0629-7ea1-4f69-b302-c0502abcbbab.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Multimash3-12B-slerp/1762652579.994557", + "retrieved_timestamp": "1762652579.994557", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Multimash3-12B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Multimash3-12B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44371046600796993 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5176624678276028 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06268882175226587 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4343958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3067652925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Multimerge-19B-pass/818e21b8-da78-4649-a71a-ba71c89d1fe7.json b/data/hfopenllm_v2/allknowingroger/Multimerge-19B-pass/818e21b8-da78-4649-a71a-ba71c89d1fe7.json new file mode 100644 index 000000000..fc024ad33 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Multimerge-19B-pass/818e21b8-da78-4649-a71a-ba71c89d1fe7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Multimerge-19B-pass/1762652579.9948218", + "retrieved_timestamp": "1762652579.994823", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Multimerge-19B-pass", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Multimerge-19B-pass" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17730510600761534 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2891778102988436 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3429583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11685505319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 19.188 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/MultiverseEx26-7B-slerp/30b74d3f-7247-4c93-9c94-dc8beba14b70.json b/data/hfopenllm_v2/allknowingroger/MultiverseEx26-7B-slerp/30b74d3f-7247-4c93-9c94-dc8beba14b70.json new file mode 100644 index 000000000..d12bd0236 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/MultiverseEx26-7B-slerp/30b74d3f-7247-4c93-9c94-dc8beba14b70.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_MultiverseEx26-7B-slerp/1762652579.995038", + "retrieved_timestamp": "1762652579.995039", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/MultiverseEx26-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/MultiverseEx26-7B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3938516469633905 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5133591871690678 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0755287009063444 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4293125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3035239361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/NeuralWestSeverus-7B-slerp/fc6d4451-0a9c-4d53-8d22-179ff7059d61.json b/data/hfopenllm_v2/allknowingroger/NeuralWestSeverus-7B-slerp/fc6d4451-0a9c-4d53-8d22-179ff7059d61.json new file mode 100644 index 000000000..d9511a3b9 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/NeuralWestSeverus-7B-slerp/fc6d4451-0a9c-4d53-8d22-179ff7059d61.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_NeuralWestSeverus-7B-slerp/1762652579.995253", + "retrieved_timestamp": "1762652579.995254", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/NeuralWestSeverus-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/NeuralWestSeverus-7B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41356046401326263 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5244283854305991 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07326283987915408 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45287499999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3137466755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Neuralcoven-7B-slerp/ba46f82b-2129-43db-ae21-09e6576dc4e6.json b/data/hfopenllm_v2/allknowingroger/Neuralcoven-7B-slerp/ba46f82b-2129-43db-ae21-09e6576dc4e6.json new file mode 100644 index 000000000..3267ae275 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Neuralcoven-7B-slerp/ba46f82b-2129-43db-ae21-09e6576dc4e6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Neuralcoven-7B-slerp/1762652579.995681", + "retrieved_timestamp": "1762652579.995682", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Neuralcoven-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Neuralcoven-7B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3858584112377381 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.530287217712165 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07854984894259819 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.429 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3293716755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Neuralmultiverse-7B-slerp/b98b76ea-b068-46ec-b929-4ca1037eaf99.json b/data/hfopenllm_v2/allknowingroger/Neuralmultiverse-7B-slerp/b98b76ea-b068-46ec-b929-4ca1037eaf99.json new file mode 100644 index 000000000..732b7e8c1 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Neuralmultiverse-7B-slerp/b98b76ea-b068-46ec-b929-4ca1037eaf99.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Neuralmultiverse-7B-slerp/1762652579.995954", + "retrieved_timestamp": "1762652579.995955", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Neuralmultiverse-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Neuralmultiverse-7B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3769154731667531 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5165722210470375 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42804166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30418882978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Ph3della5-14B/d5a47313-b2f5-4833-9539-b8f56e4a5fda.json b/data/hfopenllm_v2/allknowingroger/Ph3della5-14B/d5a47313-b2f5-4833-9539-b8f56e4a5fda.json new file mode 100644 index 000000000..4b3cc7c69 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Ph3della5-14B/d5a47313-b2f5-4833-9539-b8f56e4a5fda.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3della5-14B/1762652579.9961941", + "retrieved_timestamp": "1762652579.996195", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Ph3della5-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Ph3della5-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47985567183960776 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6331746353794991 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17673716012084592 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3422818791946309 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4386145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4787234042553192 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Ph3merge-14B/95228f47-8fb1-443c-8ad4-0021504e34e0.json b/data/hfopenllm_v2/allknowingroger/Ph3merge-14B/95228f47-8fb1-443c-8ad4-0021504e34e0.json new file mode 100644 index 000000000..422c3d87a --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Ph3merge-14B/95228f47-8fb1-443c-8ad4-0021504e34e0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3merge-14B/1762652579.996419", + "retrieved_timestamp": "1762652579.9964201", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Ph3merge-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Ph3merge-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27012881376968667 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.638087568868341 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4334375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4611037234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.619 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Ph3merge2-14B/b5790fec-6c12-42a3-853c-488658bf949d.json b/data/hfopenllm_v2/allknowingroger/Ph3merge2-14B/b5790fec-6c12-42a3-853c-488658bf949d.json new file mode 100644 index 000000000..279c00686 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Ph3merge2-14B/b5790fec-6c12-42a3-853c-488658bf949d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3merge2-14B/1762652579.996639", + "retrieved_timestamp": "1762652579.99664", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Ph3merge2-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Ph3merge2-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17061064641817045 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3606937444321621 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3910833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1722905585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.619 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Ph3merge3-14B/e5d9bded-a8e4-4133-84b9-6eac517a4226.json b/data/hfopenllm_v2/allknowingroger/Ph3merge3-14B/e5d9bded-a8e4-4133-84b9-6eac517a4226.json new file mode 100644 index 000000000..40127acc2 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Ph3merge3-14B/e5d9bded-a8e4-4133-84b9-6eac517a4226.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3merge3-14B/1762652579.99685", + "retrieved_timestamp": "1762652579.996851", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Ph3merge3-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Ph3merge3-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1645157072124186 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3597431731140411 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40819791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16472739361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.619 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Ph3task1-14B/718ef6de-5926-4a4c-bade-9a162ce8e730.json b/data/hfopenllm_v2/allknowingroger/Ph3task1-14B/718ef6de-5926-4a4c-bade-9a162ce8e730.json new file mode 100644 index 000000000..1b52cda46 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Ph3task1-14B/718ef6de-5926-4a4c-bade-9a162ce8e730.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3task1-14B/1762652579.997059", + "retrieved_timestamp": "1762652579.99706", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Ph3task1-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Ph3task1-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46946435457918323 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.63178060736657 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16691842900302115 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35067114093959734 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45077083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4734042553191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Ph3task2-14B/5d818d86-2caf-4b29-9c15-8fa27217de22.json b/data/hfopenllm_v2/allknowingroger/Ph3task2-14B/5d818d86-2caf-4b29-9c15-8fa27217de22.json new file mode 100644 index 000000000..719f0a115 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Ph3task2-14B/5d818d86-2caf-4b29-9c15-8fa27217de22.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3task2-14B/1762652579.99728", + "retrieved_timestamp": "1762652579.997281", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Ph3task2-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Ph3task2-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4713127834146731 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6098412220695854 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14652567975830816 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4535 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44597739361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Ph3task3-14B/a935c0d1-6623-45c6-a100-96c8b5a3a2fb.json b/data/hfopenllm_v2/allknowingroger/Ph3task3-14B/a935c0d1-6623-45c6-a100-96c8b5a3a2fb.json new file mode 100644 index 000000000..51e1df907 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Ph3task3-14B/a935c0d1-6623-45c6-a100-96c8b5a3a2fb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3task3-14B/1762652579.997498", + "retrieved_timestamp": "1762652579.997499", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Ph3task3-14B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Ph3task3-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4962421929369628 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6297915743094921 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17598187311178248 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3414429530201342 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44255208333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47706117021276595 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Ph3unsloth-3B-slerp/0a9be33a-792e-413c-b60d-3e97a060fa78.json b/data/hfopenllm_v2/allknowingroger/Ph3unsloth-3B-slerp/0a9be33a-792e-413c-b60d-3e97a060fa78.json new file mode 100644 index 000000000..405c47e04 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Ph3unsloth-3B-slerp/0a9be33a-792e-413c-b60d-3e97a060fa78.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3unsloth-3B-slerp/1762652579.99772", + "retrieved_timestamp": "1762652579.99772", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Ph3unsloth-3B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Ph3unsloth-3B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18944511673470835 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5468077356147099 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10120845921450151 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45278124999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3700964095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Quen2-65B/4bc3f55b-0638-4fc2-b1d9-04780707acef.json b/data/hfopenllm_v2/allknowingroger/Quen2-65B/4bc3f55b-0638-4fc2-b1d9-04780707acef.json new file mode 100644 index 000000000..75a207e17 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Quen2-65B/4bc3f55b-0638-4fc2-b1d9-04780707acef.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Quen2-65B/1762652579.9981499", + "retrieved_timestamp": "1762652579.9981499", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Quen2-65B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Quen2-65B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17578137120617737 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27565161872324456 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23573825503355705 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32085416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11136968085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 63.923 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/RogerMerge-7B-slerp/50289a8b-4522-4dca-b6dc-aa42193deefa.json b/data/hfopenllm_v2/allknowingroger/RogerMerge-7B-slerp/50289a8b-4522-4dca-b6dc-aa42193deefa.json new file mode 100644 index 000000000..54519ce30 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/RogerMerge-7B-slerp/50289a8b-4522-4dca-b6dc-aa42193deefa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_RogerMerge-7B-slerp/1762652580.002474", + "retrieved_timestamp": "1762652580.002475", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/RogerMerge-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/RogerMerge-7B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39330199426410817 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5160176493085935 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06873111782477341 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43197916666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30302526595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Strangecoven-7B-slerp/f125c8d1-57f3-4b79-ace4-2104b008a507.json b/data/hfopenllm_v2/allknowingroger/Strangecoven-7B-slerp/f125c8d1-57f3-4b79-ace4-2104b008a507.json new file mode 100644 index 000000000..3b14accda --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Strangecoven-7B-slerp/f125c8d1-57f3-4b79-ace4-2104b008a507.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Strangecoven-7B-slerp/1762652580.002888", + "retrieved_timestamp": "1762652580.002889", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Strangecoven-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Strangecoven-7B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37464261492839 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5368022290282338 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07628398791540786 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4198854166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33643617021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Weirdslerp2-25B/61e517f7-e2db-48bd-8f4e-f62b5859b62e.json b/data/hfopenllm_v2/allknowingroger/Weirdslerp2-25B/61e517f7-e2db-48bd-8f4e-f62b5859b62e.json new file mode 100644 index 000000000..439f5d6a4 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Weirdslerp2-25B/61e517f7-e2db-48bd-8f4e-f62b5859b62e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Weirdslerp2-25B/1762652580.00309", + "retrieved_timestamp": "1762652580.0030909", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Weirdslerp2-25B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Weirdslerp2-25B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1754068094877148 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2873695911207614 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24916107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3523541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11278257978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 25.204 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/WestlakeMaziyar-7B-slerp/2db948db-a9e5-41cf-9567-2f9198d80900.json b/data/hfopenllm_v2/allknowingroger/WestlakeMaziyar-7B-slerp/2db948db-a9e5-41cf-9567-2f9198d80900.json new file mode 100644 index 000000000..2d9780ebf --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/WestlakeMaziyar-7B-slerp/2db948db-a9e5-41cf-9567-2f9198d80900.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_WestlakeMaziyar-7B-slerp/1762652580.003291", + "retrieved_timestamp": "1762652580.0032918", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/WestlakeMaziyar-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/WestlakeMaziyar-7B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48377748817581795 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5245479952765804 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44738541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3077626329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/YamMaths-7B-slerp/52ab1e94-4e6f-4876-932b-a45a033dec1b.json b/data/hfopenllm_v2/allknowingroger/YamMaths-7B-slerp/52ab1e94-4e6f-4876-932b-a45a033dec1b.json new file mode 100644 index 000000000..73dff62b9 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/YamMaths-7B-slerp/52ab1e94-4e6f-4876-932b-a45a033dec1b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_YamMaths-7B-slerp/1762652580.003488", + "retrieved_timestamp": "1762652580.003489", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/YamMaths-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/YamMaths-7B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4148093724650594 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5155845857281723 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08534743202416918 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43836458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3130817819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Yi-1.5-34B/98455065-72e1-4dad-bce1-1c3ceddf5433.json b/data/hfopenllm_v2/allknowingroger/Yi-1.5-34B/98455065-72e1-4dad-bce1-1c3ceddf5433.json new file mode 100644 index 000000000..fee1804eb --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Yi-1.5-34B/98455065-72e1-4dad-bce1-1c3ceddf5433.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Yi-1.5-34B/1762652580.0036852", + "retrieved_timestamp": "1762652580.003686", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Yi-1.5-34B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Yi-1.5-34B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16391618682872555 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28272506287695653 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38565625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10954122340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Yi-blossom-40B/b35eaca2-0f77-4171-bbcf-23a191b055f2.json b/data/hfopenllm_v2/allknowingroger/Yi-blossom-40B/b35eaca2-0f77-4171-bbcf-23a191b055f2.json new file mode 100644 index 000000000..c2d5fae60 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Yi-blossom-40B/b35eaca2-0f77-4171-bbcf-23a191b055f2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Yi-blossom-40B/1762652580.004046", + "retrieved_timestamp": "1762652580.0040479", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Yi-blossom-40B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Yi-blossom-40B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20088587170928693 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32150442258143547 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3842604166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10804521276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 18.769 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Yibuddy-35B/dc2688b9-9dff-4a2e-b3d8-3bdc82634d20.json b/data/hfopenllm_v2/allknowingroger/Yibuddy-35B/dc2688b9-9dff-4a2e-b3d8-3bdc82634d20.json new file mode 100644 index 000000000..4ce44f0d0 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Yibuddy-35B/dc2688b9-9dff-4a2e-b3d8-3bdc82634d20.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Yibuddy-35B/1762652580.004411", + "retrieved_timestamp": "1762652580.004412", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Yibuddy-35B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Yibuddy-35B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4234774841864032 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5916185369526096 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15709969788519637 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35570469798657717 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45045833333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44888630319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Yislerp-34B/723d2f60-f12a-4abb-9061-807fd38e7d51.json b/data/hfopenllm_v2/allknowingroger/Yislerp-34B/723d2f60-f12a-4abb-9061-807fd38e7d51.json new file mode 100644 index 000000000..0d29f7e54 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Yislerp-34B/723d2f60-f12a-4abb-9061-807fd38e7d51.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Yislerp-34B/1762652580.0049741", + "retrieved_timestamp": "1762652580.004975", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Yislerp-34B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Yislerp-34B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3691970637907419 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6158722731484186 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21601208459214502 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35822147651006714 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.456625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4751496010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Yislerp2-34B/ce55aca1-80bd-4711-ad05-d812d206bd14.json b/data/hfopenllm_v2/allknowingroger/Yislerp2-34B/ce55aca1-80bd-4711-ad05-d812d206bd14.json new file mode 100644 index 000000000..d1528c2b3 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Yislerp2-34B/ce55aca1-80bd-4711-ad05-d812d206bd14.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Yislerp2-34B/1762652580.005196", + "retrieved_timestamp": "1762652580.005197", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Yislerp2-34B", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Yislerp2-34B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39994658616914236 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6245771970170245 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.229607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3640939597315436 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45296875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.472406914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/Yunconglong-13B-slerp/8ae47af1-5ae6-4cb9-ac94-8d70fda5126d.json b/data/hfopenllm_v2/allknowingroger/Yunconglong-13B-slerp/8ae47af1-5ae6-4cb9-ac94-8d70fda5126d.json new file mode 100644 index 000000000..57becbd06 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/Yunconglong-13B-slerp/8ae47af1-5ae6-4cb9-ac94-8d70fda5126d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Yunconglong-13B-slerp/1762652580.005601", + "retrieved_timestamp": "1762652580.005603", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Yunconglong-13B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/Yunconglong-13B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42417673993891764 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5165807158493828 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4160729166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30360704787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allknowingroger/limyClown-7B-slerp/420f8334-c420-4b8f-8853-fea8f4f5ac6d.json b/data/hfopenllm_v2/allknowingroger/limyClown-7B-slerp/420f8334-c420-4b8f-8853-fea8f4f5ac6d.json new file mode 100644 index 000000000..07cd165f2 --- /dev/null +++ b/data/hfopenllm_v2/allknowingroger/limyClown-7B-slerp/420f8334-c420-4b8f-8853-fea8f4f5ac6d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_limyClown-7B-slerp/1762652580.005876", + "retrieved_timestamp": "1762652580.005877", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/limyClown-7B-slerp", + "developer": "allknowingroger", + "inference_platform": "unknown", + "id": "allknowingroger/limyClown-7B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4017451473202215 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5147517317055973 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06873111782477341 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4293125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30377327127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allura-org/L3.1-8b-RP-Ink/cb8c45ae-1be6-4ab0-9317-cfbfc8850dc4.json b/data/hfopenllm_v2/allura-org/L3.1-8b-RP-Ink/cb8c45ae-1be6-4ab0-9317-cfbfc8850dc4.json new file mode 100644 index 000000000..3fa9328fa --- /dev/null +++ b/data/hfopenllm_v2/allura-org/L3.1-8b-RP-Ink/cb8c45ae-1be6-4ab0-9317-cfbfc8850dc4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allura-org_L3.1-8b-RP-Ink/1762652580.006678", + "retrieved_timestamp": "1762652580.006679", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allura-org/L3.1-8b-RP-Ink", + "developer": "allura-org", + "inference_platform": "unknown", + "id": "allura-org/L3.1-8b-RP-Ink" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7811063533646281 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48284724308518095 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14803625377643503 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3608229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3427526595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allura-org/MN-12b-RP-Ink/3dc6cdf9-e75d-4f9f-9b91-9592e70566f8.json b/data/hfopenllm_v2/allura-org/MN-12b-RP-Ink/3dc6cdf9-e75d-4f9f-9b91-9592e70566f8.json new file mode 100644 index 000000000..7378c1ed7 --- /dev/null +++ b/data/hfopenllm_v2/allura-org/MN-12b-RP-Ink/3dc6cdf9-e75d-4f9f-9b91-9592e70566f8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allura-org_MN-12b-RP-Ink/1762652580.006974", + "retrieved_timestamp": "1762652580.006975", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allura-org/MN-12b-RP-Ink", + "developer": "allura-org", + "inference_platform": "unknown", + "id": "allura-org/MN-12b-RP-Ink" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7186332265056716 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4833826588550261 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11858006042296072 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38184375000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3513962765957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allura-org/MS-Meadowlark-22B/7ea2cf22-114f-449c-a9cf-c4f379646cd3.json b/data/hfopenllm_v2/allura-org/MS-Meadowlark-22B/7ea2cf22-114f-449c-a9cf-c4f379646cd3.json new file mode 100644 index 000000000..428d9e6b5 --- /dev/null +++ b/data/hfopenllm_v2/allura-org/MS-Meadowlark-22B/7ea2cf22-114f-449c-a9cf-c4f379646cd3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allura-org_MS-Meadowlark-22B/1762652580.007196", + "retrieved_timestamp": "1762652580.007197", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allura-org/MS-Meadowlark-22B", + "developer": "allura-org", + "inference_platform": "unknown", + "id": "allura-org/MS-Meadowlark-22B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.669698621878837 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5162576933217772 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18353474320241692 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32550335570469796 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3842604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38231382978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allura-org/MoE-Girl-1BA-7BT/5b3176a0-7ded-409a-bc54-70e0ecf9b325.json b/data/hfopenllm_v2/allura-org/MoE-Girl-1BA-7BT/5b3176a0-7ded-409a-bc54-70e0ecf9b325.json new file mode 100644 index 000000000..1f85340e5 --- /dev/null +++ b/data/hfopenllm_v2/allura-org/MoE-Girl-1BA-7BT/5b3176a0-7ded-409a-bc54-70e0ecf9b325.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allura-org_MoE-Girl-1BA-7BT/1762652580.0080209", + "retrieved_timestamp": "1762652580.008022", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allura-org/MoE-Girl-1BA-7BT", + "developer": "allura-org", + "inference_platform": "unknown", + "id": "allura-org/MoE-Girl-1BA-7BT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27050337548814923 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3139175363262408 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34355208333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12175864361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "OlmoeForCausalLM", + "params_billions": 6.919 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allura-org/TQ2.5-14B-Aletheia-v1/b46bef60-b37b-4510-a92a-fb4c0cabb357.json b/data/hfopenllm_v2/allura-org/TQ2.5-14B-Aletheia-v1/b46bef60-b37b-4510-a92a-fb4c0cabb357.json new file mode 100644 index 000000000..128adaba2 --- /dev/null +++ b/data/hfopenllm_v2/allura-org/TQ2.5-14B-Aletheia-v1/b46bef60-b37b-4510-a92a-fb4c0cabb357.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allura-org_TQ2.5-14B-Aletheia-v1/1762652580.008265", + "retrieved_timestamp": "1762652580.008276", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allura-org/TQ2.5-14B-Aletheia-v1", + "developer": "allura-org", + "inference_platform": "unknown", + "id": "allura-org/TQ2.5-14B-Aletheia-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7530297388706411 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6585074769185942 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33987915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3624161073825503 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44515625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5241023936170213 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allura-org/TQ2.5-14B-Neon-v1/68bdab24-8324-4190-abd2-ad3ad5a7a853.json b/data/hfopenllm_v2/allura-org/TQ2.5-14B-Neon-v1/68bdab24-8324-4190-abd2-ad3ad5a7a853.json new file mode 100644 index 000000000..e9c11a66c --- /dev/null +++ b/data/hfopenllm_v2/allura-org/TQ2.5-14B-Neon-v1/68bdab24-8324-4190-abd2-ad3ad5a7a853.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allura-org_TQ2.5-14B-Neon-v1/1762652580.0085812", + "retrieved_timestamp": "1762652580.0085819", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allura-org/TQ2.5-14B-Neon-v1", + "developer": "allura-org", + "inference_platform": "unknown", + "id": "allura-org/TQ2.5-14B-Neon-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6754189993661264 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.655304131044165 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36027190332326287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3716442953020134 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.461 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5252659574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/allura-org/Teleut-7b/85ceb275-787a-4dbc-981a-513fd16606ea.json b/data/hfopenllm_v2/allura-org/Teleut-7b/85ceb275-787a-4dbc-981a-513fd16606ea.json new file mode 100644 index 000000000..9a3b9bf6d --- /dev/null +++ b/data/hfopenllm_v2/allura-org/Teleut-7b/85ceb275-787a-4dbc-981a-513fd16606ea.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allura-org_Teleut-7b/1762652580.008814", + "retrieved_timestamp": "1762652580.008814", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allura-org/Teleut-7b", + "developer": "allura-org", + "inference_platform": "unknown", + "id": "allura-org/Teleut-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6378752820294595 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5141277814496585 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24093655589123866 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3263422818791946 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4640416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4130651595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/aloobun/d-SmolLM2-360M/1ad7b4c4-8074-482e-9010-ce1552325e15.json b/data/hfopenllm_v2/aloobun/d-SmolLM2-360M/1ad7b4c4-8074-482e-9010-ce1552325e15.json new file mode 100644 index 000000000..f9fb6355b --- /dev/null +++ b/data/hfopenllm_v2/aloobun/d-SmolLM2-360M/1ad7b4c4-8074-482e-9010-ce1552325e15.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/aloobun_d-SmolLM2-360M/1762652580.0092921", + "retrieved_timestamp": "1762652580.009293", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "aloobun/d-SmolLM2-360M", + "developer": "aloobun", + "inference_platform": "unknown", + "id": "aloobun/d-SmolLM2-360M" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20970358648386284 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3195784405636826 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3980625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11693816489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.362 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alpindale/WizardLM-2-8x22B/c2899c4e-5bc9-4b0b-8938-b9848b86fe37.json b/data/hfopenllm_v2/alpindale/WizardLM-2-8x22B/c2899c4e-5bc9-4b0b-8938-b9848b86fe37.json new file mode 100644 index 000000000..c6bdd758d --- /dev/null +++ b/data/hfopenllm_v2/alpindale/WizardLM-2-8x22B/c2899c4e-5bc9-4b0b-8938-b9848b86fe37.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/alpindale_WizardLM-2-8x22B/1762652580.009551", + "retrieved_timestamp": "1762652580.0095518", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alpindale/WizardLM-2-8x22B", + "developer": "alpindale", + "inference_platform": "unknown", + "id": "alpindale/WizardLM-2-8x22B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5272166739805937 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6377307938917097 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38171140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4387083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45960771276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 140.621 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/alpindale/magnum-72b-v1/186687f8-ed25-44c9-b634-36db1c734844.json b/data/hfopenllm_v2/alpindale/magnum-72b-v1/186687f8-ed25-44c9-b634-36db1c734844.json new file mode 100644 index 000000000..644786870 --- /dev/null +++ b/data/hfopenllm_v2/alpindale/magnum-72b-v1/186687f8-ed25-44c9-b634-36db1c734844.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/alpindale_magnum-72b-v1/1762652580.0098088", + "retrieved_timestamp": "1762652580.00981", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alpindale/magnum-72b-v1", + "developer": "alpindale", + "inference_platform": "unknown", + "id": "alpindale/magnum-72b-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7606484128778308 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6982215794373214 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39803625377643503 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39093959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4489375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5467918882978723 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/altomek/YiSM-34B-0rn/a9c75810-f51d-4fd3-8c96-6afdbc0f278c.json b/data/hfopenllm_v2/altomek/YiSM-34B-0rn/a9c75810-f51d-4fd3-8c96-6afdbc0f278c.json new file mode 100644 index 000000000..102b9570e --- /dev/null +++ b/data/hfopenllm_v2/altomek/YiSM-34B-0rn/a9c75810-f51d-4fd3-8c96-6afdbc0f278c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/altomek_YiSM-34B-0rn/1762652580.010027", + "retrieved_timestamp": "1762652580.0100281", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "altomek/YiSM-34B-0rn", + "developer": "altomek", + "inference_platform": "unknown", + "id": "altomek/YiSM-34B-0rn" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.428373382624769 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6140009573868866 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2280966767371601 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3716442953020134 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.445 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4695811170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v1-72b/6d98f0fa-25c9-409b-b82e-b3c128bf47b6.json b/data/hfopenllm_v2/anthracite-org/magnum-v1-72b/6d98f0fa-25c9-409b-b82e-b3c128bf47b6.json new file mode 100644 index 000000000..bb64ccfe0 --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v1-72b/6d98f0fa-25c9-409b-b82e-b3c128bf47b6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v1-72b/1762652580.0112262", + "retrieved_timestamp": "1762652580.011227", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "anthracite-org/magnum-v1-72b", + "developer": "anthracite-org", + "inference_platform": "unknown", + "id": "anthracite-org/magnum-v1-72b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7606484128778308 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6982215794373214 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39803625377643503 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39093959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4489375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5486203457446809 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v2-12b/72821a7d-cc27-4557-82d4-7e30286ea126.json b/data/hfopenllm_v2/anthracite-org/magnum-v2-12b/72821a7d-cc27-4557-82d4-7e30286ea126.json new file mode 100644 index 000000000..c6406bce4 --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v2-12b/72821a7d-cc27-4557-82d4-7e30286ea126.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v2-12b/1762652580.011473", + "retrieved_timestamp": "1762652580.011474", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "anthracite-org/magnum-v2-12b", + "developer": "anthracite-org", + "inference_platform": "unknown", + "id": "anthracite-org/magnum-v2-12b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.376166349729828 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5020864013200114 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41790625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31673869680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v2-72b/31d80ab1-348f-4b5a-963e-f027adf32101.json b/data/hfopenllm_v2/anthracite-org/magnum-v2-72b/31d80ab1-348f-4b5a-963e-f027adf32101.json new file mode 100644 index 000000000..7171ebe2c --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v2-72b/31d80ab1-348f-4b5a-963e-f027adf32101.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v2-72b/1762652580.01168", + "retrieved_timestamp": "1762652580.01168", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "anthracite-org/magnum-v2-72b", + "developer": "anthracite-org", + "inference_platform": "unknown", + "id": "anthracite-org/magnum-v2-72b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7560273407891063 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7005076514129516 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3542296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3859060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4371875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5456283244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v2.5-12b-kto/74e67572-01d9-4890-9c5a-27b5559cf752.json b/data/hfopenllm_v2/anthracite-org/magnum-v2.5-12b-kto/74e67572-01d9-4890-9c5a-27b5559cf752.json new file mode 100644 index 000000000..6e6ed5991 --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v2.5-12b-kto/74e67572-01d9-4890-9c5a-27b5559cf752.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v2.5-12b-kto/1762652580.011887", + "retrieved_timestamp": "1762652580.011888", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "anthracite-org/magnum-v2.5-12b-kto", + "developer": "anthracite-org", + "inference_platform": "unknown", + "id": "anthracite-org/magnum-v2.5-12b-kto" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3865576669902525 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5076961186254344 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05211480362537765 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40863541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3214760638297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v3-27b-kto/9a74a1f1-0322-4f96-8e52-76bbde948fa9.json b/data/hfopenllm_v2/anthracite-org/magnum-v3-27b-kto/9a74a1f1-0322-4f96-8e52-76bbde948fa9.json new file mode 100644 index 000000000..d594f30fb --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v3-27b-kto/9a74a1f1-0322-4f96-8e52-76bbde948fa9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v3-27b-kto/1762652580.012144", + "retrieved_timestamp": "1762652580.0121448", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "anthracite-org/magnum-v3-27b-kto", + "developer": "anthracite-org", + "inference_platform": "unknown", + "id": "anthracite-org/magnum-v3-27b-kto" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5674831668860845 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.586040577894583 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18126888217522658 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35570469798657717 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38546874999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42378656914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v3-34b/8ace78d5-5390-49ec-935d-2c7faf7569ca.json b/data/hfopenllm_v2/anthracite-org/magnum-v3-34b/8ace78d5-5390-49ec-935d-2c7faf7569ca.json new file mode 100644 index 000000000..55c2d45a1 --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v3-34b/8ace78d5-5390-49ec-935d-2c7faf7569ca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v3-34b/1762652580.012352", + "retrieved_timestamp": "1762652580.012352", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "anthracite-org/magnum-v3-34b", + "developer": "anthracite-org", + "inference_platform": "unknown", + "id": "anthracite-org/magnum-v3-34b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5115294086357531 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6087828692085228 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19486404833836857 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36073825503355705 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3872395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47523271276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v3-9b-chatml/42df1809-0021-4968-a18b-86cefc0125d7.json b/data/hfopenllm_v2/anthracite-org/magnum-v3-9b-chatml/42df1809-0021-4968-a18b-86cefc0125d7.json new file mode 100644 index 000000000..3286d66f6 --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v3-9b-chatml/42df1809-0021-4968-a18b-86cefc0125d7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v3-9b-chatml/1762652580.0125592", + "retrieved_timestamp": "1762652580.0125592", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "anthracite-org/magnum-v3-9b-chatml", + "developer": "anthracite-org", + "inference_platform": "unknown", + "id": "anthracite-org/magnum-v3-9b-chatml" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12747066671985885 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5427688488887096 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06948640483383686 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34563758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4432291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4242021276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v4-12b/c7ba8947-fd38-4ba1-9169-6c9164123273.json b/data/hfopenllm_v2/anthracite-org/magnum-v4-12b/c7ba8947-fd38-4ba1-9169-6c9164123273.json new file mode 100644 index 000000000..fb37b306a --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v4-12b/c7ba8947-fd38-4ba1-9169-6c9164123273.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v4-12b/1762652580.013016", + "retrieved_timestamp": "1762652580.013016", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "anthracite-org/magnum-v4-12b", + "developer": "anthracite-org", + "inference_platform": "unknown", + "id": "anthracite-org/magnum-v4-12b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33929640021808805 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5176693046591915 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11782477341389729 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40928125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3603723404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v4-22b/5e3f808c-964d-492d-a003-37594dd36f89.json b/data/hfopenllm_v2/anthracite-org/magnum-v4-22b/5e3f808c-964d-492d-a003-37594dd36f89.json new file mode 100644 index 000000000..784a61915 --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v4-22b/5e3f808c-964d-492d-a003-37594dd36f89.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v4-22b/1762652580.013223", + "retrieved_timestamp": "1762652580.013224", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "anthracite-org/magnum-v4-22b", + "developer": "anthracite-org", + "inference_platform": "unknown", + "id": "anthracite-org/magnum-v4-22b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5628620947973599 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.548612004937422 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2001510574018127 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44078124999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3829787234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v4-27b/113ce0c6-c292-4924-adca-afdbcdd4c381.json b/data/hfopenllm_v2/anthracite-org/magnum-v4-27b/113ce0c6-c292-4924-adca-afdbcdd4c381.json new file mode 100644 index 000000000..6c2caa3b1 --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v4-27b/113ce0c6-c292-4924-adca-afdbcdd4c381.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v4-27b/1762652580.013432", + "retrieved_timestamp": "1762652580.013433", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "anthracite-org/magnum-v4-27b", + "developer": "anthracite-org", + "inference_platform": "unknown", + "id": "anthracite-org/magnum-v4-27b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34541682735142754 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5867298109891389 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3699664429530201 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4379895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43758311170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthracite-org/magnum-v4-9b/55401aa6-ad61-42d6-9163-5d105a9091bf.json b/data/hfopenllm_v2/anthracite-org/magnum-v4-9b/55401aa6-ad61-42d6-9163-5d105a9091bf.json new file mode 100644 index 000000000..10772be33 --- /dev/null +++ b/data/hfopenllm_v2/anthracite-org/magnum-v4-9b/55401aa6-ad61-42d6-9163-5d105a9091bf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v4-9b/1762652580.013639", + "retrieved_timestamp": "1762652580.013639", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "anthracite-org/magnum-v4-9b", + "developer": "anthracite-org", + "inference_platform": "unknown", + "id": "anthracite-org/magnum-v4-9b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3502628581053826 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5336423991931557 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13066465256797583 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34731543624161076 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45157291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3952792553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/anthropic/xxx777xxxASD/L3.1-ClaudeMaid-4x8B/ae6d070b-71de-40c3-8f69-944ce2e33abb.json b/data/hfopenllm_v2/anthropic/xxx777xxxASD/L3.1-ClaudeMaid-4x8B/ae6d070b-71de-40c3-8f69-944ce2e33abb.json new file mode 100644 index 000000000..a9b8f8176 --- /dev/null +++ b/data/hfopenllm_v2/anthropic/xxx777xxxASD/L3.1-ClaudeMaid-4x8B/ae6d070b-71de-40c3-8f69-944ce2e33abb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xxx777xxxASD_L3.1-ClaudeMaid-4x8B/1762652580.602767", + "retrieved_timestamp": "1762652580.602768", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xxx777xxxASD/L3.1-ClaudeMaid-4x8B", + "developer": "anthropic", + "inference_platform": "unknown", + "id": "xxx777xxxASD/L3.1-ClaudeMaid-4x8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6696487541944263 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5070848048063867 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14123867069486404 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42893749999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35804521276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.942 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/apple/DCLM-7B/3891ad0a-0acf-4d3e-a9e8-533633d9557a.json b/data/hfopenllm_v2/apple/DCLM-7B/3891ad0a-0acf-4d3e-a9e8-533633d9557a.json new file mode 100644 index 000000000..2f8794dc3 --- /dev/null +++ b/data/hfopenllm_v2/apple/DCLM-7B/3891ad0a-0acf-4d3e-a9e8-533633d9557a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/apple_DCLM-7B/1762652580.0138528", + "retrieved_timestamp": "1762652580.013854", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "apple/DCLM-7B", + "developer": "apple", + "inference_platform": "unknown", + "id": "apple/DCLM-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21727239280664196 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42321423668184166 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03700906344410876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3920729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3110871010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "OpenLMModel", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/appvoid/arco-2-instruct/95d1d5d9-b613-46b4-b0de-540641d8d81a.json b/data/hfopenllm_v2/appvoid/arco-2-instruct/95d1d5d9-b613-46b4-b0de-540641d8d81a.json new file mode 100644 index 000000000..1290f6154 --- /dev/null +++ b/data/hfopenllm_v2/appvoid/arco-2-instruct/95d1d5d9-b613-46b4-b0de-540641d8d81a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/appvoid_arco-2-instruct/1762652580.014716", + "retrieved_timestamp": "1762652580.0147169", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "appvoid/arco-2-instruct", + "developer": "appvoid", + "inference_platform": "unknown", + "id": "appvoid/arco-2-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2164479137577184 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31330470624451107 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23825503355704697 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34959375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11128656914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.514 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/appvoid/arco-2/a037593c-0f98-4b23-a139-12cfc435de3c.json b/data/hfopenllm_v2/appvoid/arco-2/a037593c-0f98-4b23-a139-12cfc435de3c.json new file mode 100644 index 000000000..f3456f654 --- /dev/null +++ b/data/hfopenllm_v2/appvoid/arco-2/a037593c-0f98-4b23-a139-12cfc435de3c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/appvoid_arco-2/1762652580.014345", + "retrieved_timestamp": "1762652580.014347", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "appvoid/arco-2", + "developer": "appvoid", + "inference_platform": "unknown", + "id": "appvoid/arco-2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19913717824261848 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31456676274830814 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23909395973154363 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35359375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1116190159574468 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.514 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/Arcee-Blitz/01e8e033-1aa9-42e2-85d8-b7974d0c9e23.json b/data/hfopenllm_v2/arcee-ai/Arcee-Blitz/01e8e033-1aa9-42e2-85d8-b7974d0c9e23.json new file mode 100644 index 000000000..0e3cd2d5c --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/Arcee-Blitz/01e8e033-1aa9-42e2-85d8-b7974d0c9e23.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Blitz/1762652580.0149639", + "retrieved_timestamp": "1762652580.014965", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "arcee-ai/Arcee-Blitz", + "developer": "arcee-ai", + "inference_platform": "unknown", + "id": "arcee-ai/Arcee-Blitz" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5543435861292482 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6606628431550884 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34818731117824775 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3850671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.50471875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6153590425531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/Arcee-Maestro-7B-Preview/126f5eda-1529-450f-8557-dcd6a33b7bd4.json b/data/hfopenllm_v2/arcee-ai/Arcee-Maestro-7B-Preview/126f5eda-1529-450f-8557-dcd6a33b7bd4.json new file mode 100644 index 000000000..170c6167a --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/Arcee-Maestro-7B-Preview/126f5eda-1529-450f-8557-dcd6a33b7bd4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Maestro-7B-Preview/1762652580.015253", + "retrieved_timestamp": "1762652580.015254", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "arcee-ai/Arcee-Maestro-7B-Preview", + "developer": "arcee-ai", + "inference_platform": "unknown", + "id": "arcee-ai/Arcee-Maestro-7B-Preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2750247122080524 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4648373015709704 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49924471299093653 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33221476510067116 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3885416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3039394946808511 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/Arcee-Nova/9063608f-8d32-4e98-ad05-621f6239d0ba.json b/data/hfopenllm_v2/arcee-ai/Arcee-Nova/9063608f-8d32-4e98-ad05-621f6239d0ba.json new file mode 100644 index 000000000..6536419ad --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/Arcee-Nova/9063608f-8d32-4e98-ad05-621f6239d0ba.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Nova/1762652580.0154781", + "retrieved_timestamp": "1762652580.015479", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "arcee-ai/Arcee-Nova", + "developer": "arcee-ai", + "inference_platform": "unknown", + "id": "arcee-ai/Arcee-Nova" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7907485471881275 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.694196965855899 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3850671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45616666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5452127659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/Arcee-Spark/1dde2278-39aa-43cf-8d94-5d4a0bb514ca.json b/data/hfopenllm_v2/arcee-ai/Arcee-Spark/1dde2278-39aa-43cf-8d94-5d4a0bb514ca.json new file mode 100644 index 000000000..9cb008bd1 --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/Arcee-Spark/1dde2278-39aa-43cf-8d94-5d4a0bb514ca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Spark/1762652580.0159192", + "retrieved_timestamp": "1762652580.0159202", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "arcee-ai/Arcee-Spark", + "developer": "arcee-ai", + "inference_platform": "unknown", + "id": "arcee-ai/Arcee-Spark" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.571829412625168 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5480864114714127 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11404833836858004 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4007604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38131648936170215 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/Arcee-Spark/84a51879-cd67-449b-ace0-f87cccd6ea8c.json b/data/hfopenllm_v2/arcee-ai/Arcee-Spark/84a51879-cd67-449b-ace0-f87cccd6ea8c.json new file mode 100644 index 000000000..fc8bf8838 --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/Arcee-Spark/84a51879-cd67-449b-ace0-f87cccd6ea8c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Spark/1762652580.015698", + "retrieved_timestamp": "1762652580.015699", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "arcee-ai/Arcee-Spark", + "developer": "arcee-ai", + "inference_platform": "unknown", + "id": "arcee-ai/Arcee-Spark" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5620874834328471 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5489474198567446 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29531722054380666 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40209374999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3822307180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/SuperNova-Medius/7e0e8ab9-a90b-4f0e-8e0a-eeceac12a4a1.json b/data/hfopenllm_v2/arcee-ai/SuperNova-Medius/7e0e8ab9-a90b-4f0e-8e0a-eeceac12a4a1.json new file mode 100644 index 000000000..b2de2bb6e --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/SuperNova-Medius/7e0e8ab9-a90b-4f0e-8e0a-eeceac12a4a1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/arcee-ai_SuperNova-Medius/1762652580.016611", + "retrieved_timestamp": "1762652580.016612", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "arcee-ai/SuperNova-Medius", + "developer": "arcee-ai", + "inference_platform": "unknown", + "id": "arcee-ai/SuperNova-Medius" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7183584001560305 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6377284463115707 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4690332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33305369127516776 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4232708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5034906914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/Virtuoso-Lite/62afba84-9929-4882-843e-3f7db7b030a3.json b/data/hfopenllm_v2/arcee-ai/Virtuoso-Lite/62afba84-9929-4882-843e-3f7db7b030a3.json new file mode 100644 index 000000000..3700230a5 --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/Virtuoso-Lite/62afba84-9929-4882-843e-3f7db7b030a3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/arcee-ai_Virtuoso-Lite/1762652580.0168262", + "retrieved_timestamp": "1762652580.0168269", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "arcee-ai/Virtuoso-Lite", + "developer": "arcee-ai", + "inference_platform": "unknown", + "id": "arcee-ai/Virtuoso-Lite" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8099575792231279 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6098520975127147 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25302114803625375 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34395973154362414 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4595416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4440658244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/Virtuoso-Small-v2/325cf0a5-6a72-466a-8e1e-531f03db6083.json b/data/hfopenllm_v2/arcee-ai/Virtuoso-Small-v2/325cf0a5-6a72-466a-8e1e-531f03db6083.json new file mode 100644 index 000000000..31dfe246e --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/Virtuoso-Small-v2/325cf0a5-6a72-466a-8e1e-531f03db6083.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/arcee-ai_Virtuoso-Small-v2/1762652580.0172758", + "retrieved_timestamp": "1762652580.017277", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "arcee-ai/Virtuoso-Small-v2", + "developer": "arcee-ai", + "inference_platform": "unknown", + "id": "arcee-ai/Virtuoso-Small-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8273181824226385 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6554097094586643 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.466012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35318791946308725 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43133333333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.518783244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/Virtuoso-Small/cc51c0e0-4e5d-496c-bf02-8b5d8f474cd3.json b/data/hfopenllm_v2/arcee-ai/Virtuoso-Small/cc51c0e0-4e5d-496c-bf02-8b5d8f474cd3.json new file mode 100644 index 000000000..0c1451eff --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/Virtuoso-Small/cc51c0e0-4e5d-496c-bf02-8b5d8f474cd3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/arcee-ai_Virtuoso-Small/1762652580.017056", + "retrieved_timestamp": "1762652580.017057", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "arcee-ai/Virtuoso-Small", + "developer": "arcee-ai", + "inference_platform": "unknown", + "id": "arcee-ai/Virtuoso-Small" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7935211904413622 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6517633129454784 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4093655589123867 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33640939597315433 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43390625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5191156914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arcee-ai/raspberry-3B/cef8c893-a903-4e30-b7e1-5f2fe8f2ac82.json b/data/hfopenllm_v2/arcee-ai/raspberry-3B/cef8c893-a903-4e30-b7e1-5f2fe8f2ac82.json new file mode 100644 index 000000000..395b28f0f --- /dev/null +++ b/data/hfopenllm_v2/arcee-ai/raspberry-3B/cef8c893-a903-4e30-b7e1-5f2fe8f2ac82.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/arcee-ai_raspberry-3B/1762652580.017479", + "retrieved_timestamp": "1762652580.017479", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "arcee-ai/raspberry-3B", + "developer": "arcee-ai", + "inference_platform": "unknown", + "id": "arcee-ai/raspberry-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31541642840995227 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42689280188827033 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10347432024169184 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41232291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.285405585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/argilla/notus-7b-v1/c06f66ea-d9e3-4902-b3fd-188110f9c1e4.json b/data/hfopenllm_v2/argilla/notus-7b-v1/c06f66ea-d9e3-4902-b3fd-188110f9c1e4.json new file mode 100644 index 000000000..d736355e2 --- /dev/null +++ b/data/hfopenllm_v2/argilla/notus-7b-v1/c06f66ea-d9e3-4902-b3fd-188110f9c1e4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/argilla_notus-7b-v1/1762652580.017684", + "retrieved_timestamp": "1762652580.017685", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "argilla/notus-7b-v1", + "developer": "argilla", + "inference_platform": "unknown", + "id": "argilla/notus-7b-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.508207112683236 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4511857407381495 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03172205438066465 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33641666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3003656914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/argilla/notux-8x7b-v1/60185907-11c2-454c-bfbc-3c5741651ab7.json b/data/hfopenllm_v2/argilla/notux-8x7b-v1/60185907-11c2-454c-bfbc-3c5741651ab7.json new file mode 100644 index 000000000..72b15cf4e --- /dev/null +++ b/data/hfopenllm_v2/argilla/notux-8x7b-v1/60185907-11c2-454c-bfbc-3c5741651ab7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/argilla_notux-8x7b-v1/1762652580.017979", + "retrieved_timestamp": "1762652580.0179799", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "argilla/notux-8x7b-v1", + "developer": "argilla", + "inference_platform": "unknown", + "id": "argilla/notux-8x7b-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5422290633297429 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5363304164516353 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09969788519637462 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41759375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3660239361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arisin/orca-platypus-13B-slerp/ecd45b21-21f7-49e2-b314-c7b678bdc8c1.json b/data/hfopenllm_v2/arisin/orca-platypus-13B-slerp/ecd45b21-21f7-49e2-b314-c7b678bdc8c1.json new file mode 100644 index 000000000..9d8d47f75 --- /dev/null +++ b/data/hfopenllm_v2/arisin/orca-platypus-13B-slerp/ecd45b21-21f7-49e2-b314-c7b678bdc8c1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/arisin_orca-platypus-13B-slerp/1762652580.018446", + "retrieved_timestamp": "1762652580.018446", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "arisin/orca-platypus-13B-slerp", + "developer": "arisin", + "inference_platform": "unknown", + "id": "arisin/orca-platypus-13B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26718107953563214 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46306234976954946 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4253125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2592253989361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.016 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/arshiaafshani/Arsh-V1/6f40503d-59ee-4cdc-a697-ef405d9644a7.json b/data/hfopenllm_v2/arshiaafshani/Arsh-V1/6f40503d-59ee-4cdc-a697-ef405d9644a7.json new file mode 100644 index 000000000..36e941a23 --- /dev/null +++ b/data/hfopenllm_v2/arshiaafshani/Arsh-V1/6f40503d-59ee-4cdc-a697-ef405d9644a7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/arshiaafshani_Arsh-V1/1762652580.0186949", + "retrieved_timestamp": "1762652580.0186958", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "arshiaafshani/Arsh-V1", + "developer": "arshiaafshani", + "inference_platform": "unknown", + "id": "arshiaafshani/Arsh-V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6043276284702368 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6739657491720434 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2620845921450151 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3733221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48989583333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5256815159574468 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.96 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ashercn97/a1-v0.0.1/a9e3fe74-400c-444c-9b28-6f49c6671f96.json b/data/hfopenllm_v2/ashercn97/a1-v0.0.1/a9e3fe74-400c-444c-9b28-6f49c6671f96.json new file mode 100644 index 000000000..119362002 --- /dev/null +++ b/data/hfopenllm_v2/ashercn97/a1-v0.0.1/a9e3fe74-400c-444c-9b28-6f49c6671f96.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ashercn97_a1-v0.0.1/1762652580.019211", + "retrieved_timestamp": "1762652580.019212", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ashercn97/a1-v0.0.1", + "developer": "ashercn97", + "inference_platform": "unknown", + "id": "ashercn97/a1-v0.0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21984445715146922 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5188122863232913 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21450151057401812 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4119791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41647273936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ashercn97/a1-v002/509c2895-70ae-4381-94ef-f6cdf9ee07ef.json b/data/hfopenllm_v2/ashercn97/a1-v002/509c2895-70ae-4381-94ef-f6cdf9ee07ef.json new file mode 100644 index 000000000..38599401c --- /dev/null +++ b/data/hfopenllm_v2/ashercn97/a1-v002/509c2895-70ae-4381-94ef-f6cdf9ee07ef.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ashercn97_a1-v002/1762652580.019455", + "retrieved_timestamp": "1762652580.019456", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ashercn97/a1-v002", + "developer": "ashercn97", + "inference_platform": "unknown", + "id": "ashercn97/a1-v002" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2584631001298776 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5261137844506322 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23413897280966767 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41591666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41747007978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/assskelad/smollm2-360M-sft_SmallThoughts/ce2f5cc8-a187-454d-ba99-4446d29aab7c.json b/data/hfopenllm_v2/assskelad/smollm2-360M-sft_SmallThoughts/ce2f5cc8-a187-454d-ba99-4446d29aab7c.json new file mode 100644 index 000000000..7b71bd5e9 --- /dev/null +++ b/data/hfopenllm_v2/assskelad/smollm2-360M-sft_SmallThoughts/ce2f5cc8-a187-454d-ba99-4446d29aab7c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/assskelad_smollm2-360M-sft_SmallThoughts/1762652580.019667", + "retrieved_timestamp": "1762652580.0196679", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "assskelad/smollm2-360M-sft_SmallThoughts", + "developer": "assskelad", + "inference_platform": "unknown", + "id": "assskelad/smollm2-360M-sft_SmallThoughts" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20071078072846715 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3149572469619188 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3395208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11818484042553191 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.362 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/9255090f-6862-4ff1-ac91-fe0cd7613445.json b/data/hfopenllm_v2/athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/9255090f-6862-4ff1-ac91-fe0cd7613445.json new file mode 100644 index 000000000..854c012b8 --- /dev/null +++ b/data/hfopenllm_v2/athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/9255090f-6862-4ff1-ac91-fe0cd7613445.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/athirdpath_Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/1762652580.019914", + "retrieved_timestamp": "1762652580.019914", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit", + "developer": "athirdpath", + "inference_platform": "unknown", + "id": "athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4521037513796726 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4939066588253951 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10196374622356495 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3863958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3564660904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/automerger/YamshadowExperiment28-7B/1fa5dee9-c360-40d9-8e67-9b415cd36616.json b/data/hfopenllm_v2/automerger/YamshadowExperiment28-7B/1fa5dee9-c360-40d9-8e67-9b415cd36616.json new file mode 100644 index 000000000..24a115bfe --- /dev/null +++ b/data/hfopenllm_v2/automerger/YamshadowExperiment28-7B/1fa5dee9-c360-40d9-8e67-9b415cd36616.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/automerger_YamshadowExperiment28-7B/1762652580.020166", + "retrieved_timestamp": "1762652580.0201669", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "automerger/YamshadowExperiment28-7B", + "developer": "automerger", + "inference_platform": "unknown", + "id": "automerger/YamshadowExperiment28-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4070156074770498 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5150030227855061 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4306145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30601728723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI/45cc7b31-3f75-42f7-9b07-3cf704fd2b55.json b/data/hfopenllm_v2/avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI/45cc7b31-3f75-42f7-9b07-3cf704fd2b55.json new file mode 100644 index 000000000..bd276659e --- /dev/null +++ b/data/hfopenllm_v2/avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI/45cc7b31-3f75-42f7-9b07-3cf704fd2b55.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/avemio_GRAG-NEMO-12B-ORPO-HESSIAN-AI/1762652580.020413", + "retrieved_timestamp": "1762652580.0204139", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI", + "developer": "avemio", + "inference_platform": "unknown", + "id": "avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26065954545866094 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3446666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10605053191489362 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/baconnier/Napoleon_24B_V0.0/88fb101e-35dd-40af-922f-9b66a2711249.json b/data/hfopenllm_v2/baconnier/Napoleon_24B_V0.0/88fb101e-35dd-40af-922f-9b66a2711249.json new file mode 100644 index 000000000..b655eb995 --- /dev/null +++ b/data/hfopenllm_v2/baconnier/Napoleon_24B_V0.0/88fb101e-35dd-40af-922f-9b66a2711249.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/baconnier_Napoleon_24B_V0.0/1762652580.0222468", + "retrieved_timestamp": "1762652580.022248", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "baconnier/Napoleon_24B_V0.0", + "developer": "baconnier", + "inference_platform": "unknown", + "id": "baconnier/Napoleon_24B_V0.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1801021290176731 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6367110843973786 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22734138972809667 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37919463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4419895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5039893617021277 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/baconnier/Napoleon_24B_V0.2/4857d2d0-1a4b-4544-8b1e-fb4b01618a3b.json b/data/hfopenllm_v2/baconnier/Napoleon_24B_V0.2/4857d2d0-1a4b-4544-8b1e-fb4b01618a3b.json new file mode 100644 index 000000000..2de000b54 --- /dev/null +++ b/data/hfopenllm_v2/baconnier/Napoleon_24B_V0.2/4857d2d0-1a4b-4544-8b1e-fb4b01618a3b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/baconnier_Napoleon_24B_V0.2/1762652580.022489", + "retrieved_timestamp": "1762652580.022489", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "baconnier/Napoleon_24B_V0.2", + "developer": "baconnier", + "inference_platform": "unknown", + "id": "baconnier/Napoleon_24B_V0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2527172347150006 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5910621269874454 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4459583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4356715425531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/baebee/7B-Cetacea/5985fed7-9c54-458d-8f64-533e248a38da.json b/data/hfopenllm_v2/baebee/7B-Cetacea/5985fed7-9c54-458d-8f64-533e248a38da.json new file mode 100644 index 000000000..b5fb9fefb --- /dev/null +++ b/data/hfopenllm_v2/baebee/7B-Cetacea/5985fed7-9c54-458d-8f64-533e248a38da.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/baebee_7B-Cetacea/1762652580.022699", + "retrieved_timestamp": "1762652580.022699", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "baebee/7B-Cetacea", + "developer": "baebee", + "inference_platform": "unknown", + "id": "baebee/7B-Cetacea" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5278660620486975 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4757171853895546 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04682779456193353 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41362499999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2954621010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/baebee/mergekit-model_stock-nzjnheg/e847afb0-c8ac-4cce-b0f9-1667c9fbef3c.json b/data/hfopenllm_v2/baebee/mergekit-model_stock-nzjnheg/e847afb0-c8ac-4cce-b0f9-1667c9fbef3c.json new file mode 100644 index 000000000..f360bc36a --- /dev/null +++ b/data/hfopenllm_v2/baebee/mergekit-model_stock-nzjnheg/e847afb0-c8ac-4cce-b0f9-1667c9fbef3c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/baebee_mergekit-model_stock-nzjnheg/1762652580.022936", + "retrieved_timestamp": "1762652580.022937", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "baebee/mergekit-model_stock-nzjnheg", + "developer": "baebee", + "inference_platform": "unknown", + "id": "baebee/mergekit-model_stock-nzjnheg" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48442687624392167 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5287391310729729 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16767371601208458 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38466666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3699301861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/baebee/mergekit-ties-fnjenli/21b3d7d0-301d-431d-9cfc-a0ad1e326f03.json b/data/hfopenllm_v2/baebee/mergekit-ties-fnjenli/21b3d7d0-301d-431d-9cfc-a0ad1e326f03.json new file mode 100644 index 000000000..8b09397de --- /dev/null +++ b/data/hfopenllm_v2/baebee/mergekit-ties-fnjenli/21b3d7d0-301d-431d-9cfc-a0ad1e326f03.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/baebee_mergekit-ties-fnjenli/1762652580.0231512", + "retrieved_timestamp": "1762652580.023152", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "baebee/mergekit-ties-fnjenli", + "developer": "baebee", + "inference_platform": "unknown", + "id": "baebee/mergekit-ties-fnjenli" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19881248420856662 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30236959112076134 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.002265861027190332 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24496644295302014 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4019375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11286569148936171 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.1v/ae256440-486f-43cf-b4a3-8d5c0ff196c9.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.1v/ae256440-486f-43cf-b4a3-8d5c0ff196c9.json new file mode 100644 index 000000000..4e2f8cb38 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.1v/ae256440-486f-43cf-b4a3-8d5c0ff196c9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.1v/1762652580.023659", + "retrieved_timestamp": "1762652580.023659", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.1v", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.1v" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36362628935668473 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5436022524587655 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13293051359516617 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41315624999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3673537234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.2v/d509b0d3-a043-4057-bf80-37ec5ceedeed.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.2v/d509b0d3-a043-4057-bf80-37ec5ceedeed.json new file mode 100644 index 000000000..324dfa648 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.2v/d509b0d3-a043-4057-bf80-37ec5ceedeed.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.2v/1762652580.023869", + "retrieved_timestamp": "1762652580.02387", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.2v", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.2v" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3623773809048879 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5434355857920987 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12613293051359517 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32550335570469796 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41582291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36627327127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.3v/8e2e1f2f-4715-4b8b-b641-d5e552500408.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.3v/8e2e1f2f-4715-4b8b-b641-d5e552500408.json new file mode 100644 index 000000000..dd61b416c --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.3v/8e2e1f2f-4715-4b8b-b641-d5e552500408.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.3v/1762652580.02432", + "retrieved_timestamp": "1762652580.024322", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.3v", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.3v" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38698209639312575 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5431389316665282 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1336858006042296 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41312499999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3663563829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.4v/4072cc72-b6b4-4a5d-8f01-f9f8437ea569.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.4v/4072cc72-b6b4-4a5d-8f01-f9f8437ea569.json new file mode 100644 index 000000000..be9a70e87 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.4v/4072cc72-b6b4-4a5d-8f01-f9f8437ea569.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.4v/1762652580.024673", + "retrieved_timestamp": "1762652580.024674", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.4v", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.4v" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6508142838778884 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5094241395384186 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1351963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41762499999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36826795212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.5v/fa2e9cff-4a7b-4efd-98ca-b8fd2cb33928.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.5v/fa2e9cff-4a7b-4efd-98ca-b8fd2cb33928.json new file mode 100644 index 000000000..fdc5ac35e --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.5v/fa2e9cff-4a7b-4efd-98ca-b8fd2cb33928.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.5v/1762652580.0249128", + "retrieved_timestamp": "1762652580.024914", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.5v", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.5v" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3745672593163916 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5421932988679541 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13670694864048338 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41315624999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36610704787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.6v/a58c4863-e5a9-425d-ad3e-5924d6146718.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.6v/a58c4863-e5a9-425d-ad3e-5924d6146718.json new file mode 100644 index 000000000..034b074cb --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_0.6v/a58c4863-e5a9-425d-ad3e-5924d6146718.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.6v/1762652580.025138", + "retrieved_timestamp": "1762652580.0251389", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.6v", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.6v" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43656608908806416 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5448909065942131 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12537764350453173 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4184895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3661901595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V/c2e334b3-e82d-40bb-a6ed-9a941bf2352a.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V/c2e334b3-e82d-40bb-a6ed-9a941bf2352a.json new file mode 100644 index 000000000..e2ff6f620 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V/c2e334b3-e82d-40bb-a6ed-9a941bf2352a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_III_IV_V/1762652580.0253649", + "retrieved_timestamp": "1762652580.025366", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40309379114083965 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.54645347832278 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12915407854984895 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41982291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3664394946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V/6f31292a-b09f-4e2c-ae3c-b093c5ba06c6.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V/6f31292a-b09f-4e2c-ae3c-b093c5ba06c6.json new file mode 100644 index 000000000..e9ffa7b59 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V/6f31292a-b09f-4e2c-ae3c-b093c5ba06c6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_III_ex_V/1762652580.025593", + "retrieved_timestamp": "1762652580.025593", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43162032296528763 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5448926891254073 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13217522658610273 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4197916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3648603723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_Neo/089a5215-70a4-4255-ac01-1b70d4e8a494.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_Neo/089a5215-70a4-4255-ac01-1b70d4e8a494.json new file mode 100644 index 000000000..1b433a24f --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B-Mix_Neo/089a5215-70a4-4255-ac01-1b70d4e8a494.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_Neo/1762652580.0258071", + "retrieved_timestamp": "1762652580.0258079", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/MISCHIEVOUS-12B-Mix_Neo", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/MISCHIEVOUS-12B-Mix_Neo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6249606599378538 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5077574728717519 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13293051359516617 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41502083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36851728723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B/49ec948c-c06d-4c01-be83-9f74ed15ea17.json b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B/49ec948c-c06d-4c01-be83-9f74ed15ea17.json new file mode 100644 index 000000000..47ad61c32 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/MISCHIEVOUS-12B/49ec948c-c06d-4c01-be83-9f74ed15ea17.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B/1762652580.02337", + "retrieved_timestamp": "1762652580.02337", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/MISCHIEVOUS-12B", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/MISCHIEVOUS-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3851835352420466 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5404981575206657 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12764350453172205 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4144895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3671875 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/NameLess-12B-prob/81670e41-16d6-43a6-9af9-6924a52a8300.json b/data/hfopenllm_v2/bamec66557/NameLess-12B-prob/81670e41-16d6-43a6-9af9-6924a52a8300.json new file mode 100644 index 000000000..848eb9c3e --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/NameLess-12B-prob/81670e41-16d6-43a6-9af9-6924a52a8300.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_NameLess-12B-prob/1762652580.026292", + "retrieved_timestamp": "1762652580.026293", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/NameLess-12B-prob", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/NameLess-12B-prob" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6602315190361574 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5158141019151304 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12613293051359517 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.433625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3684341755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.1v/2d468a71-7364-40eb-8a98-1dbac956b3cf.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.1v/2d468a71-7364-40eb-8a98-1dbac956b3cf.json new file mode 100644 index 000000000..09777d04a --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.1v/2d468a71-7364-40eb-8a98-1dbac956b3cf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-0.1v/1762652580.026718", + "retrieved_timestamp": "1762652580.026719", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/VICIOUS_MESH-12B-0.1v", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/VICIOUS_MESH-12B-0.1v" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36574954454181574 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5412276004529172 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13217522658610273 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41582291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36826795212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.X.ver/d0c92f20-72d0-431c-b8ba-881b3a6ae158.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.X.ver/d0c92f20-72d0-431c-b8ba-881b3a6ae158.json new file mode 100644 index 000000000..061f431bc --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-0.X.ver/d0c92f20-72d0-431c-b8ba-881b3a6ae158.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-0.X.ver/1762652580.0269299", + "retrieved_timestamp": "1762652580.0269299", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/VICIOUS_MESH-12B-0.X.ver", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/VICIOUS_MESH-12B-0.X.ver" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37756486123485683 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.541624689936422 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12009063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41982291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36710438829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-ALPHA/0053cf6a-0e1e-49c5-8d0a-b3d7254e22f3.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-ALPHA/0053cf6a-0e1e-49c5-8d0a-b3d7254e22f3.json new file mode 100644 index 000000000..9e4b4a2be --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-ALPHA/0053cf6a-0e1e-49c5-8d0a-b3d7254e22f3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-ALPHA/1762652580.0271401", + "retrieved_timestamp": "1762652580.027141", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/VICIOUS_MESH-12B-ALPHA", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/VICIOUS_MESH-12B-ALPHA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6365011502812536 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5093679898057982 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13670694864048338 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4202916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3696808510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-BETA/2f023511-2446-48f8-83e5-47225f15e905.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-BETA/2f023511-2446-48f8-83e5-47225f15e905.json new file mode 100644 index 000000000..ee84f26e1 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-BETA/2f023511-2446-48f8-83e5-47225f15e905.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-BETA/1762652580.0273511", + "retrieved_timestamp": "1762652580.0273511", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/VICIOUS_MESH-12B-BETA", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/VICIOUS_MESH-12B-BETA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6720967034136092 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5155964285724085 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13293051359516617 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4309895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36785239361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DELTA/fcaf0de1-f4f5-4bfb-8276-29b3b1f5b5be.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DELTA/fcaf0de1-f4f5-4bfb-8276-29b3b1f5b5be.json new file mode 100644 index 000000000..7e48cf625 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DELTA/fcaf0de1-f4f5-4bfb-8276-29b3b1f5b5be.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-DELTA/1762652580.027563", + "retrieved_timestamp": "1762652580.027563", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/VICIOUS_MESH-12B-DELTA", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/VICIOUS_MESH-12B-DELTA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6468924675416783 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5055418480543742 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13746223564954682 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40565625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3651097074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DIGAMMA/67e74757-9950-499e-9258-7ccd20b29835.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DIGAMMA/67e74757-9950-499e-9258-7ccd20b29835.json new file mode 100644 index 000000000..fe5bcd446 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-DIGAMMA/67e74757-9950-499e-9258-7ccd20b29835.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-DIGAMMA/1762652580.027769", + "retrieved_timestamp": "1762652580.02777", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/VICIOUS_MESH-12B-DIGAMMA", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/VICIOUS_MESH-12B-DIGAMMA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6429207835210575 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.506116784464076 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1336858006042296 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40965625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36585771276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-EPSILON/38864e75-9bb0-4eaa-ba87-c631838a9ad1.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-EPSILON/38864e75-9bb0-4eaa-ba87-c631838a9ad1.json new file mode 100644 index 000000000..f794c7890 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-EPSILON/38864e75-9bb0-4eaa-ba87-c631838a9ad1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-EPSILON/1762652580.0279832", + "retrieved_timestamp": "1762652580.0279832", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/VICIOUS_MESH-12B-EPSILON", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/VICIOUS_MESH-12B-EPSILON" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6304560787599126 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5037995611302296 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12613293051359517 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4069895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36477726063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-GAMMA/4507a6c1-bfff-4e8d-92c6-7e923f74c4dc.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-GAMMA/4507a6c1-bfff-4e8d-92c6-7e923f74c4dc.json new file mode 100644 index 000000000..bfd838bcd --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-GAMMA/4507a6c1-bfff-4e8d-92c6-7e923f74c4dc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-GAMMA/1762652580.028181", + "retrieved_timestamp": "1762652580.028182", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/VICIOUS_MESH-12B-GAMMA", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/VICIOUS_MESH-12B-GAMMA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6361764562472019 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5181908355069679 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13066465256797583 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43632291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3666057180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-NEMO/6a9c649c-fbcd-489a-bc01-083014932a45.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-NEMO/6a9c649c-fbcd-489a-bc01-083014932a45.json new file mode 100644 index 000000000..3cacdb9e9 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-NEMO/6a9c649c-fbcd-489a-bc01-083014932a45.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-NEMO/1762652580.028384", + "retrieved_timestamp": "1762652580.028385", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/VICIOUS_MESH-12B-NEMO", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/VICIOUS_MESH-12B-NEMO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40221944440750546 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5441680901949261 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1268882175226586 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42506249999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37159242021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-OMEGA/a630e843-ec9c-432b-986a-2b181c789507.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-OMEGA/a630e843-ec9c-432b-986a-2b181c789507.json new file mode 100644 index 000000000..a7c829825 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-OMEGA/a630e843-ec9c-432b-986a-2b181c789507.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-OMEGA/1762652580.028594", + "retrieved_timestamp": "1762652580.028594", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/VICIOUS_MESH-12B-OMEGA", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/VICIOUS_MESH-12B-OMEGA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6699734482284783 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.516644373777888 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13444108761329304 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43232291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36768617021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-UNION/20d0e946-e7cf-48a6-a81e-f73d774e0e2b.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-UNION/20d0e946-e7cf-48a6-a81e-f73d774e0e2b.json new file mode 100644 index 000000000..871c3c355 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B-UNION/20d0e946-e7cf-48a6-a81e-f73d774e0e2b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-UNION/1762652580.028806", + "retrieved_timestamp": "1762652580.028807", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/VICIOUS_MESH-12B-UNION", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/VICIOUS_MESH-12B-UNION" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6428709158366468 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5106643448765741 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13897280966767372 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4256875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3671875 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B/f2ef86c9-e968-42e0-a0d0-1cf79f9c249b.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B/f2ef86c9-e968-42e0-a0d0-1cf79f9c249b.json new file mode 100644 index 000000000..b840402a3 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B/f2ef86c9-e968-42e0-a0d0-1cf79f9c249b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B/1762652580.026504", + "retrieved_timestamp": "1762652580.026504", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/VICIOUS_MESH-12B", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/VICIOUS_MESH-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37156965739792636 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5436022524587655 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13444108761329304 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4104895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36785239361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B_Razor/950f6bff-e0ec-4556-85b7-81444008d1d4.json b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B_Razor/950f6bff-e0ec-4556-85b7-81444008d1d4.json new file mode 100644 index 000000000..925cada33 --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/VICIOUS_MESH-12B_Razor/950f6bff-e0ec-4556-85b7-81444008d1d4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B_Razor/1762652580.029016", + "retrieved_timestamp": "1762652580.029016", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/VICIOUS_MESH-12B_Razor", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/VICIOUS_MESH-12B_Razor" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37364304489864675 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5447127693928118 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40915624999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36685505319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/mergekit-model_stock-zdaysvi/8932da66-d29a-4453-9b61-bee48f1a28f1.json b/data/hfopenllm_v2/bamec66557/mergekit-model_stock-zdaysvi/8932da66-d29a-4453-9b61-bee48f1a28f1.json new file mode 100644 index 000000000..dabbce7be --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/mergekit-model_stock-zdaysvi/8932da66-d29a-4453-9b61-bee48f1a28f1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_mergekit-model_stock-zdaysvi/1762652580.029272", + "retrieved_timestamp": "1762652580.029272", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/mergekit-model_stock-zdaysvi", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/mergekit-model_stock-zdaysvi" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6425960894870055 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5062803896601668 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1351963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41238541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36884973404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bamec66557/mergekit-ties-sinbkow/b8c00b3b-c35a-4511-965b-6096e9b116de.json b/data/hfopenllm_v2/bamec66557/mergekit-ties-sinbkow/b8c00b3b-c35a-4511-965b-6096e9b116de.json new file mode 100644 index 000000000..f8d280c9c --- /dev/null +++ b/data/hfopenllm_v2/bamec66557/mergekit-ties-sinbkow/b8c00b3b-c35a-4511-965b-6096e9b116de.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_mergekit-ties-sinbkow/1762652580.029482", + "retrieved_timestamp": "1762652580.029482", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/mergekit-ties-sinbkow", + "developer": "bamec66557", + "inference_platform": "unknown", + "id": "bamec66557/mergekit-ties-sinbkow" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6431956098706986 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5092084289828543 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14501510574018128 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40447916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36028922872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 6.124 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/belztjti/dffghgjh/82b3c9ac-16bb-4fd0-8bed-af1ac598a424.json b/data/hfopenllm_v2/belztjti/dffghgjh/82b3c9ac-16bb-4fd0-8bed-af1ac598a424.json new file mode 100644 index 000000000..947c8cb4f --- /dev/null +++ b/data/hfopenllm_v2/belztjti/dffghgjh/82b3c9ac-16bb-4fd0-8bed-af1ac598a424.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/belztjti_dffghgjh/1762652580.0296938", + "retrieved_timestamp": "1762652580.029695", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "belztjti/dffghgjh", + "developer": "belztjti", + "inference_platform": "unknown", + "id": "belztjti/dffghgjh" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5784241368457914 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35817085768640783 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.023413897280966767 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34745833333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3421708776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GlmForCausalLM", + "params_billions": 9.543 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/belztjti/dtfgv/655ea5ea-d94a-43eb-a4bf-182fd021d65a.json b/data/hfopenllm_v2/belztjti/dtfgv/655ea5ea-d94a-43eb-a4bf-182fd021d65a.json new file mode 100644 index 000000000..a3f77aecf --- /dev/null +++ b/data/hfopenllm_v2/belztjti/dtfgv/655ea5ea-d94a-43eb-a4bf-182fd021d65a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/belztjti_dtfgv/1762652580.029931", + "retrieved_timestamp": "1762652580.029932", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "belztjti/dtfgv", + "developer": "belztjti", + "inference_platform": "unknown", + "id": "belztjti/dtfgv" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.334450369464133 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32815316667476035 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01812688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3793958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15043218085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 9.543 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/beowolx/CodeNinja-1.0-OpenChat-7B/fbe7d86c-8d1e-474a-bf85-35a139bdb08f.json b/data/hfopenllm_v2/beowolx/CodeNinja-1.0-OpenChat-7B/fbe7d86c-8d1e-474a-bf85-35a139bdb08f.json new file mode 100644 index 000000000..bf43f33ec --- /dev/null +++ b/data/hfopenllm_v2/beowolx/CodeNinja-1.0-OpenChat-7B/fbe7d86c-8d1e-474a-bf85-35a139bdb08f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/beowolx_CodeNinja-1.0-OpenChat-7B/1762652580.030703", + "retrieved_timestamp": "1762652580.030704", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "beowolx/CodeNinja-1.0-OpenChat-7B", + "developer": "beowolx", + "inference_platform": "unknown", + "id": "beowolx/CodeNinja-1.0-OpenChat-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5446770125489258 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4441338669403703 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06722054380664652 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42432291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3015292553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/berkeley-nest/Starling-LM-7B-alpha/ddc116b6-5b9a-409f-a0ab-09e5630d1289.json b/data/hfopenllm_v2/berkeley-nest/Starling-LM-7B-alpha/ddc116b6-5b9a-409f-a0ab-09e5630d1289.json new file mode 100644 index 000000000..b266e095d --- /dev/null +++ b/data/hfopenllm_v2/berkeley-nest/Starling-LM-7B-alpha/ddc116b6-5b9a-409f-a0ab-09e5630d1289.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/berkeley-nest_Starling-LM-7B-alpha/1762652580.030957", + "retrieved_timestamp": "1762652580.0309582", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "berkeley-nest/Starling-LM-7B-alpha", + "developer": "berkeley-nest", + "inference_platform": "unknown", + "id": "berkeley-nest/Starling-LM-7B-alpha" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5480491761858536 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4440065261164004 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08383685800604229 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41201041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3171542553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bfuzzy1/Gunny/e7d0c3d5-d962-49b5-a4b7-3cb7ac12735c.json b/data/hfopenllm_v2/bfuzzy1/Gunny/e7d0c3d5-d962-49b5-a4b7-3cb7ac12735c.json new file mode 100644 index 000000000..da4dce67d --- /dev/null +++ b/data/hfopenllm_v2/bfuzzy1/Gunny/e7d0c3d5-d962-49b5-a4b7-3cb7ac12735c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bfuzzy1_Gunny/1762652580.031208", + "retrieved_timestamp": "1762652580.031209", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bfuzzy1/Gunny", + "developer": "bfuzzy1", + "inference_platform": "unknown", + "id": "bfuzzy1/Gunny" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7128629813339716 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45459857092962414 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1729607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35828124999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3038563829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bfuzzy1/acheron-c/71268c77-565a-401b-a51d-122060ed5945.json b/data/hfopenllm_v2/bfuzzy1/acheron-c/71268c77-565a-401b-a51d-122060ed5945.json new file mode 100644 index 000000000..91c18cdd9 --- /dev/null +++ b/data/hfopenllm_v2/bfuzzy1/acheron-c/71268c77-565a-401b-a51d-122060ed5945.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron-c/1762652580.031654", + "retrieved_timestamp": "1762652580.0316548", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bfuzzy1/acheron-c", + "developer": "bfuzzy1", + "inference_platform": "unknown", + "id": "bfuzzy1/acheron-c" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19286714805604685 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30260703404313577 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0030211480362537764 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24748322147651006 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33821875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1171875 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.514 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bfuzzy1/acheron-d/1c9ba45f-1f3b-42ad-a603-ea7039fee22e.json b/data/hfopenllm_v2/bfuzzy1/acheron-d/1c9ba45f-1f3b-42ad-a603-ea7039fee22e.json new file mode 100644 index 000000000..b7f366fb0 --- /dev/null +++ b/data/hfopenllm_v2/bfuzzy1/acheron-d/1c9ba45f-1f3b-42ad-a603-ea7039fee22e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron-d/1762652580.031856", + "retrieved_timestamp": "1762652580.031857", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bfuzzy1/acheron-d", + "developer": "bfuzzy1", + "inference_platform": "unknown", + "id": "bfuzzy1/acheron-d" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.192542454021995 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3139959864926003 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23657718120805368 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34971875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11344747340425532 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.514 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bfuzzy1/acheron-m/fdd707f8-df0b-4384-bc77-35f3fa8ec0a0.json b/data/hfopenllm_v2/bfuzzy1/acheron-m/fdd707f8-df0b-4384-bc77-35f3fa8ec0a0.json new file mode 100644 index 000000000..30d367302 --- /dev/null +++ b/data/hfopenllm_v2/bfuzzy1/acheron-m/fdd707f8-df0b-4384-bc77-35f3fa8ec0a0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron-m/1762652580.032056", + "retrieved_timestamp": "1762652580.032057", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bfuzzy1/acheron-m", + "developer": "bfuzzy1", + "inference_platform": "unknown", + "id": "bfuzzy1/acheron-m" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17583123889058808 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29284447696551025 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3486666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11128656914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.514 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bfuzzy1/acheron/2b74949a-c0a3-4061-8cf4-4330850af288.json b/data/hfopenllm_v2/bfuzzy1/acheron/2b74949a-c0a3-4061-8cf4-4330850af288.json new file mode 100644 index 000000000..97a3e1258 --- /dev/null +++ b/data/hfopenllm_v2/bfuzzy1/acheron/2b74949a-c0a3-4061-8cf4-4330850af288.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron/1762652580.031447", + "retrieved_timestamp": "1762652580.031447", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bfuzzy1/acheron", + "developer": "bfuzzy1", + "inference_platform": "unknown", + "id": "bfuzzy1/acheron" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19831269919369493 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3107918622526179 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23909395973154363 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3510520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10962433510638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.514 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bfuzzy1/llambses-1/3f04797b-fe6d-4cd5-a49e-b898a8db26a6.json b/data/hfopenllm_v2/bfuzzy1/llambses-1/3f04797b-fe6d-4cd5-a49e-b898a8db26a6.json new file mode 100644 index 000000000..b55af96a5 --- /dev/null +++ b/data/hfopenllm_v2/bfuzzy1/llambses-1/3f04797b-fe6d-4cd5-a49e-b898a8db26a6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bfuzzy1_llambses-1/1762652580.032492", + "retrieved_timestamp": "1762652580.032493", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bfuzzy1/llambses-1", + "developer": "bfuzzy1", + "inference_platform": "unknown", + "id": "bfuzzy1/llambses-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3553837152089788 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5046977405175623 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06873111782477341 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45290625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31399601063829785 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bhuvneshsaini/merged_model/44e6cddd-4ecc-499f-a6b7-d8ee0640c2f9.json b/data/hfopenllm_v2/bhuvneshsaini/merged_model/44e6cddd-4ecc-499f-a6b7-d8ee0640c2f9.json new file mode 100644 index 000000000..362361b16 --- /dev/null +++ b/data/hfopenllm_v2/bhuvneshsaini/merged_model/44e6cddd-4ecc-499f-a6b7-d8ee0640c2f9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bhuvneshsaini_merged_model/1762652580.032705", + "retrieved_timestamp": "1762652580.032706", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bhuvneshsaini/merged_model", + "developer": "bhuvneshsaini", + "inference_platform": "unknown", + "id": "bhuvneshsaini/merged_model" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1812767900282362 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3359777949071243 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34971875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14453125 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.715 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bigcode/starcoder2-15b/09aa04cf-9369-453f-952a-2f6c74e4707a.json b/data/hfopenllm_v2/bigcode/starcoder2-15b/09aa04cf-9369-453f-952a-2f6c74e4707a.json new file mode 100644 index 000000000..2b9fad2c1 --- /dev/null +++ b/data/hfopenllm_v2/bigcode/starcoder2-15b/09aa04cf-9369-453f-952a-2f6c74e4707a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bigcode_starcoder2-15b/1762652580.032956", + "retrieved_timestamp": "1762652580.0329568", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bigcode/starcoder2-15b", + "developer": "bigcode", + "inference_platform": "unknown", + "id": "bigcode/starcoder2-15b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2780223141265177 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4447957841230437 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05966767371601209 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35009375000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23528922872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Starcoder2ForCausalLM", + "params_billions": 15.958 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bigcode/starcoder2-3b/7385c595-5b4f-4491-8e71-ece57ffffbd2.json b/data/hfopenllm_v2/bigcode/starcoder2-3b/7385c595-5b4f-4491-8e71-ece57ffffbd2.json new file mode 100644 index 000000000..0d2c8119a --- /dev/null +++ b/data/hfopenllm_v2/bigcode/starcoder2-3b/7385c595-5b4f-4491-8e71-ece57ffffbd2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bigcode_starcoder2-3b/1762652580.0331972", + "retrieved_timestamp": "1762652580.0331972", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bigcode/starcoder2-3b", + "developer": "bigcode", + "inference_platform": "unknown", + "id": "bigcode/starcoder2-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20370838264693236 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35087141384601755 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24412751677852348 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34345833333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1636469414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Starcoder2ForCausalLM", + "params_billions": 3.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bigcode/starcoder2-7b/53eac61a-064e-4786-bc94-962382d88f77.json b/data/hfopenllm_v2/bigcode/starcoder2-7b/53eac61a-064e-4786-bc94-962382d88f77.json new file mode 100644 index 000000000..abce2d86e --- /dev/null +++ b/data/hfopenllm_v2/bigcode/starcoder2-7b/53eac61a-064e-4786-bc94-962382d88f77.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bigcode_starcoder2-7b/1762652580.0333922", + "retrieved_timestamp": "1762652580.0333922", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bigcode/starcoder2-7b", + "developer": "bigcode", + "inference_platform": "unknown", + "id": "bigcode/starcoder2-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22091938279321088 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36609857669123036 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030966767371601207 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3793333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16422872340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Starcoder2ForCausalLM", + "params_billions": 7.174 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bigscience/bloom-1b1/284ba4fb-cae4-46ac-a5dd-a36fb145da55.json b/data/hfopenllm_v2/bigscience/bloom-1b1/284ba4fb-cae4-46ac-a5dd-a36fb145da55.json new file mode 100644 index 000000000..c109d586b --- /dev/null +++ b/data/hfopenllm_v2/bigscience/bloom-1b1/284ba4fb-cae4-46ac-a5dd-a36fb145da55.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bigscience_bloom-1b1/1762652580.033589", + "retrieved_timestamp": "1762652580.033589", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bigscience/bloom-1b1", + "developer": "bigscience", + "inference_platform": "unknown", + "id": "bigscience/bloom-1b1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13733781920858879 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31072762377370394 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.005287009063444109 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36999999999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1107878989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "BloomForCausalLM", + "params_billions": 1.065 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bigscience/bloom-1b7/8adb8bb9-d057-45df-827a-cd8f014b4ff6.json b/data/hfopenllm_v2/bigscience/bloom-1b7/8adb8bb9-d057-45df-827a-cd8f014b4ff6.json new file mode 100644 index 000000000..accb79738 --- /dev/null +++ b/data/hfopenllm_v2/bigscience/bloom-1b7/8adb8bb9-d057-45df-827a-cd8f014b4ff6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bigscience_bloom-1b7/1762652580.033839", + "retrieved_timestamp": "1762652580.033839", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bigscience/bloom-1b7", + "developer": "bigscience", + "inference_platform": "unknown", + "id": "bigscience/bloom-1b7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10438968603305895 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.314054919904072 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.005287009063444109 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38857291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10862699468085106 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "BloomForCausalLM", + "params_billions": 1.722 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bigscience/bloom-3b/88f90805-7410-4ec1-ad19-8e8a146f1ba3.json b/data/hfopenllm_v2/bigscience/bloom-3b/88f90805-7410-4ec1-ad19-8e8a146f1ba3.json new file mode 100644 index 000000000..818bbd49b --- /dev/null +++ b/data/hfopenllm_v2/bigscience/bloom-3b/88f90805-7410-4ec1-ad19-8e8a146f1ba3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bigscience_bloom-3b/1762652580.034177", + "retrieved_timestamp": "1762652580.034179", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bigscience/bloom-3b", + "developer": "bigscience", + "inference_platform": "unknown", + "id": "bigscience/bloom-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1270961050013963 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062918592346337 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.008308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23993288590604026 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3980625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11328125 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "BloomForCausalLM", + "params_billions": 3.003 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bigscience/bloom-560m/82454b92-cca1-4ac8-a620-e1a8487a5b8e.json b/data/hfopenllm_v2/bigscience/bloom-560m/82454b92-cca1-4ac8-a620-e1a8487a5b8e.json new file mode 100644 index 000000000..85e4c8e5f --- /dev/null +++ b/data/hfopenllm_v2/bigscience/bloom-560m/82454b92-cca1-4ac8-a620-e1a8487a5b8e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bigscience_bloom-560m/1762652580.034546", + "retrieved_timestamp": "1762652580.034548", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bigscience/bloom-560m", + "developer": "bigscience", + "inference_platform": "unknown", + "id": "bigscience/bloom-560m" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06202431769926019 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3025950541549823 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0037764350453172208 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4030833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11643949468085106 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "BloomForCausalLM", + "params_billions": 0.559 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bigscience/bloom-7b1/d5fe1452-b6ee-4f1d-9eca-713b49a6a941.json b/data/hfopenllm_v2/bigscience/bloom-7b1/d5fe1452-b6ee-4f1d-9eca-713b49a6a941.json new file mode 100644 index 000000000..b5965fbbe --- /dev/null +++ b/data/hfopenllm_v2/bigscience/bloom-7b1/d5fe1452-b6ee-4f1d-9eca-713b49a6a941.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bigscience_bloom-7b1/1762652580.0348449", + "retrieved_timestamp": "1762652580.034846", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bigscience/bloom-7b1", + "developer": "bigscience", + "inference_platform": "unknown", + "id": "bigscience/bloom-7b1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13221696210499254 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3113718529627139 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.005287009063444109 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34869791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11045545212765957 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "BloomForCausalLM", + "params_billions": 7.069 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bluuwhale/L3-SthenoMaid-8B-V1/44dd13bc-56f0-4dd1-90d0-bb411239109a.json b/data/hfopenllm_v2/bluuwhale/L3-SthenoMaid-8B-V1/44dd13bc-56f0-4dd1-90d0-bb411239109a.json new file mode 100644 index 000000000..fe03a39b7 --- /dev/null +++ b/data/hfopenllm_v2/bluuwhale/L3-SthenoMaid-8B-V1/44dd13bc-56f0-4dd1-90d0-bb411239109a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bluuwhale_L3-SthenoMaid-8B-V1/1762652580.035146", + "retrieved_timestamp": "1762652580.035147", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bluuwhale/L3-SthenoMaid-8B-V1", + "developer": "bluuwhale", + "inference_platform": "unknown", + "id": "bluuwhale/L3-SthenoMaid-8B-V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7344700949037443 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5218759253208048 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10800604229607251 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3686979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3656083776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bond005/meno-tiny-0.1/109acb38-3026-4573-b082-8277b9501f09.json b/data/hfopenllm_v2/bond005/meno-tiny-0.1/109acb38-3026-4573-b082-8277b9501f09.json new file mode 100644 index 000000000..4969a9b60 --- /dev/null +++ b/data/hfopenllm_v2/bond005/meno-tiny-0.1/109acb38-3026-4573-b082-8277b9501f09.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bond005_meno-tiny-0.1/1762652580.035417", + "retrieved_timestamp": "1762652580.035417", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bond005/meno-tiny-0.1", + "developer": "bond005", + "inference_platform": "unknown", + "id": "bond005/meno-tiny-0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45497613000172876 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4262909130965971 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13897280966767372 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4184583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785904255319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/braindao/Qwen2.5-14B-Instruct/cb442f90-a0e1-4588-900c-548b994a764d.json b/data/hfopenllm_v2/braindao/Qwen2.5-14B-Instruct/cb442f90-a0e1-4588-900c-548b994a764d.json new file mode 100644 index 000000000..83f502037 --- /dev/null +++ b/data/hfopenllm_v2/braindao/Qwen2.5-14B-Instruct/cb442f90-a0e1-4588-900c-548b994a764d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/braindao_Qwen2.5-14B-Instruct/1762652580.040103", + "retrieved_timestamp": "1762652580.040104", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "braindao/Qwen2.5-14B-Instruct", + "developer": "braindao", + "inference_platform": "unknown", + "id": "braindao/Qwen2.5-14B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8142539572778007 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6403640774008682 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288590604026846 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.414 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48894614361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/braindao/iq-code-evmind-0.5b/58f1b3d7-74a6-4ed0-b927-afaedfdda25f.json b/data/hfopenllm_v2/braindao/iq-code-evmind-0.5b/58f1b3d7-74a6-4ed0-b927-afaedfdda25f.json new file mode 100644 index 000000000..9d8ebfdce --- /dev/null +++ b/data/hfopenllm_v2/braindao/iq-code-evmind-0.5b/58f1b3d7-74a6-4ed0-b927-afaedfdda25f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/braindao_iq-code-evmind-0.5b/1762652580.0403671", + "retrieved_timestamp": "1762652580.040368", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "braindao/iq-code-evmind-0.5b", + "developer": "braindao", + "inference_platform": "unknown", + "id": "braindao/iq-code-evmind-0.5b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3215612353001148 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31637440507987097 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02416918429003021 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24161073825503357 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33037500000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11893284574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/brgx53/3Bgeneral-ECE-PRYMMAL-Martial/06d2ac1d-d70c-4cda-997d-9d4d1ef50c5a.json b/data/hfopenllm_v2/brgx53/3Bgeneral-ECE-PRYMMAL-Martial/06d2ac1d-d70c-4cda-997d-9d4d1ef50c5a.json new file mode 100644 index 000000000..3f85e2a86 --- /dev/null +++ b/data/hfopenllm_v2/brgx53/3Bgeneral-ECE-PRYMMAL-Martial/06d2ac1d-d70c-4cda-997d-9d4d1ef50c5a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/brgx53_3Bgeneral-ECE-PRYMMAL-Martial/1762652580.040573", + "retrieved_timestamp": "1762652580.0405738", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "brgx53/3Bgeneral-ECE-PRYMMAL-Martial", + "developer": "brgx53", + "inference_platform": "unknown", + "id": "brgx53/3Bgeneral-ECE-PRYMMAL-Martial" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32893057088525113 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5458008312900208 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13141993957703926 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43728125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3933676861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial/c7f6603c-dcca-49b9-94bd-0a1fbf707dd9.json b/data/hfopenllm_v2/brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial/c7f6603c-dcca-49b9-94bd-0a1fbf707dd9.json new file mode 100644 index 000000000..46cb1318a --- /dev/null +++ b/data/hfopenllm_v2/brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial/c7f6603c-dcca-49b9-94bd-0a1fbf707dd9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/brgx53_3Bgeneralv2-ECE-PRYMMAL-Martial/1762652580.040823", + "retrieved_timestamp": "1762652580.0408242", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial", + "developer": "brgx53", + "inference_platform": "unknown", + "id": "brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.567708125551315 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5607195549186694 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3496978851963746 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43563541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45054853723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/brgx53/3Blareneg-ECE-PRYMMAL-Martial/6fea29aa-174f-4e3f-be91-c79842126c2c.json b/data/hfopenllm_v2/brgx53/3Blareneg-ECE-PRYMMAL-Martial/6fea29aa-174f-4e3f-be91-c79842126c2c.json new file mode 100644 index 000000000..5b5056a7b --- /dev/null +++ b/data/hfopenllm_v2/brgx53/3Blareneg-ECE-PRYMMAL-Martial/6fea29aa-174f-4e3f-be91-c79842126c2c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/brgx53_3Blareneg-ECE-PRYMMAL-Martial/1762652580.041033", + "retrieved_timestamp": "1762652580.041034", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "brgx53/3Blareneg-ECE-PRYMMAL-Martial", + "developer": "brgx53", + "inference_platform": "unknown", + "id": "brgx53/3Blareneg-ECE-PRYMMAL-Martial" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28763902002242936 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.535846215598753 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3347315436241611 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4428958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4015957446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/brgx53/3Blarenegv2-ECE-PRYMMAL-Martial/64e92286-72ea-4318-aaea-4e0be87a0067.json b/data/hfopenllm_v2/brgx53/3Blarenegv2-ECE-PRYMMAL-Martial/64e92286-72ea-4318-aaea-4e0be87a0067.json new file mode 100644 index 000000000..eb2ce0e5d --- /dev/null +++ b/data/hfopenllm_v2/brgx53/3Blarenegv2-ECE-PRYMMAL-Martial/64e92286-72ea-4318-aaea-4e0be87a0067.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/brgx53_3Blarenegv2-ECE-PRYMMAL-Martial/1762652580.04124", + "retrieved_timestamp": "1762652580.04124", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "brgx53/3Blarenegv2-ECE-PRYMMAL-Martial", + "developer": "brgx53", + "inference_platform": "unknown", + "id": "brgx53/3Blarenegv2-ECE-PRYMMAL-Martial" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5661843907498769 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5607195549186694 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3496978851963746 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43563541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45054853723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/brgx53/Barracuda-PRYMMAL-ECE-TW3/70a11b76-f8e4-4cfb-8ab6-791c7e9ba113.json b/data/hfopenllm_v2/brgx53/Barracuda-PRYMMAL-ECE-TW3/70a11b76-f8e4-4cfb-8ab6-791c7e9ba113.json new file mode 100644 index 000000000..38954e577 --- /dev/null +++ b/data/hfopenllm_v2/brgx53/Barracuda-PRYMMAL-ECE-TW3/70a11b76-f8e4-4cfb-8ab6-791c7e9ba113.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/brgx53_Barracuda-PRYMMAL-ECE-TW3/1762652580.041505", + "retrieved_timestamp": "1762652580.041506", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "brgx53/Barracuda-PRYMMAL-ECE-TW3", + "developer": "brgx53", + "inference_platform": "unknown", + "id": "brgx53/Barracuda-PRYMMAL-ECE-TW3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16401592219754696 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30024599561514337 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0022658610271903325 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36085416666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10929188829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/brgx53/LaConfiance-PRYMMAL-ECE-TW3/f4766bd8-0130-4ed1-ae1c-8177a65d94a9.json b/data/hfopenllm_v2/brgx53/LaConfiance-PRYMMAL-ECE-TW3/f4766bd8-0130-4ed1-ae1c-8177a65d94a9.json new file mode 100644 index 000000000..912079853 --- /dev/null +++ b/data/hfopenllm_v2/brgx53/LaConfiance-PRYMMAL-ECE-TW3/f4766bd8-0130-4ed1-ae1c-8177a65d94a9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/brgx53_LaConfiance-PRYMMAL-ECE-TW3/1762652580.041717", + "retrieved_timestamp": "1762652580.041717", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "brgx53/LaConfiance-PRYMMAL-ECE-TW3", + "developer": "brgx53", + "inference_platform": "unknown", + "id": "brgx53/LaConfiance-PRYMMAL-ECE-TW3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1579209829917951 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29624186550380993 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38457291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11461103723404255 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Blabbertron-1.0/195957fa-9d4e-49ec-afd9-17125ebcf62d.json b/data/hfopenllm_v2/bunnycore/Blabbertron-1.0/195957fa-9d4e-49ec-afd9-17125ebcf62d.json new file mode 100644 index 000000000..a84739d6a --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Blabbertron-1.0/195957fa-9d4e-49ec-afd9-17125ebcf62d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Blabbertron-1.0/1762652580.0421708", + "retrieved_timestamp": "1762652580.042172", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Blabbertron-1.0", + "developer": "bunnycore", + "inference_platform": "unknown", + "id": "bunnycore/Blabbertron-1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7433376773627309 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5496552006589083 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49244712990936557 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4336875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4354222074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Blabbertron-1.1/9fbe416c-de18-4f83-812c-f48071a49917.json b/data/hfopenllm_v2/bunnycore/Blabbertron-1.1/9fbe416c-de18-4f83-812c-f48071a49917.json new file mode 100644 index 000000000..b1a0cdba9 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Blabbertron-1.1/9fbe416c-de18-4f83-812c-f48071a49917.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Blabbertron-1.1/1762652580.0424142", + "retrieved_timestamp": "1762652580.0424151", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Blabbertron-1.1", + "developer": "bunnycore", + "inference_platform": "unknown", + "id": "bunnycore/Blabbertron-1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7265267268625026 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5534000697428705 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48036253776435045 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4415625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44306848404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v1/814129ce-9101-4d9b-9e53-9161a010743f.json b/data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v1/814129ce-9101-4d9b-9e53-9161a010743f.json new file mode 100644 index 000000000..ff9f210af --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v1/814129ce-9101-4d9b-9e53-9161a010743f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_DeepThinker-7B-Sce-v1/1762652580.043317", + "retrieved_timestamp": "1762652580.043317", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/DeepThinker-7B-Sce-v1", + "developer": "bunnycore", + "inference_platform": "unknown", + "id": "bunnycore/DeepThinker-7B-Sce-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12180015691698028 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30182806791122846 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.009818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41942708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11228390957446809 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v2/82cc30d2-9bb6-499f-b522-c66688e07c00.json b/data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v2/82cc30d2-9bb6-499f-b522-c66688e07c00.json new file mode 100644 index 000000000..2b800219f --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/DeepThinker-7B-Sce-v2/82cc30d2-9bb6-499f-b522-c66688e07c00.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_DeepThinker-7B-Sce-v2/1762652580.0435221", + "retrieved_timestamp": "1762652580.043523", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/DeepThinker-7B-Sce-v2", + "developer": "bunnycore", + "inference_platform": "unknown", + "id": "bunnycore/DeepThinker-7B-Sce-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16306621985221434 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3056842322947901 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.011329305135951661 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4100625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11461103723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct/d851bc0d-5f11-40f6-982c-39809dffe946.json b/data/hfopenllm_v2/bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct/d851bc0d-5f11-40f6-982c-39809dffe946.json new file mode 100644 index 000000000..a4a6c1f0e --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct/d851bc0d-5f11-40f6-982c-39809dffe946.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_FuseCyberMix-Qwen-2.5-7B-Instruct/1762652580.043724", + "retrieved_timestamp": "1762652580.043725", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct", + "developer": "bunnycore", + "inference_platform": "unknown", + "id": "bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7019220113742648 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5517973725429837 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48413897280966767 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40203125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43367686170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/FuseQwQen-7B/06b6f8e3-f3c7-43a6-bb69-e1eb3bd10b7a.json b/data/hfopenllm_v2/bunnycore/FuseQwQen-7B/06b6f8e3-f3c7-43a6-bb69-e1eb3bd10b7a.json new file mode 100644 index 000000000..703ab0684 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/FuseQwQen-7B/06b6f8e3-f3c7-43a6-bb69-e1eb3bd10b7a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_FuseQwQen-7B/1762652580.0439281", + "retrieved_timestamp": "1762652580.043929", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/FuseQwQen-7B", + "developer": "bunnycore", + "inference_platform": "unknown", + "id": "bunnycore/FuseQwQen-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7274509412802475 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5504256932515404 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43655589123867067 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4216875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4406582446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Maestro-S1k-7B-Sce/cc0c2de6-5a8d-4229-bd92-a1ad0b95a6b0.json b/data/hfopenllm_v2/bunnycore/Maestro-S1k-7B-Sce/cc0c2de6-5a8d-4229-bd92-a1ad0b95a6b0.json new file mode 100644 index 000000000..ad156157d --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Maestro-S1k-7B-Sce/cc0c2de6-5a8d-4229-bd92-a1ad0b95a6b0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Maestro-S1k-7B-Sce/1762652580.048955", + "retrieved_timestamp": "1762652580.048955", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Maestro-S1k-7B-Sce", + "developer": "bunnycore", + "inference_platform": "unknown", + "id": "bunnycore/Maestro-S1k-7B-Sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2522684255553044 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104380842714463 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3768229166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11702127659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qandora-2.5-7B-Creative/acd82774-f29a-4b19-b08c-693706bb4603.json b/data/hfopenllm_v2/bunnycore/Qandora-2.5-7B-Creative/acd82774-f29a-4b19-b08c-693706bb4603.json new file mode 100644 index 000000000..23b76e19d --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qandora-2.5-7B-Creative/acd82774-f29a-4b19-b08c-693706bb4603.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qandora-2.5-7B-Creative/1762652580.0529459", + "retrieved_timestamp": "1762652580.052947", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qandora-2.5-7B-Creative", + "developer": "bunnycore", + "inference_platform": "unknown", + "id": "bunnycore/Qandora-2.5-7B-Creative" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6803148978044922 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5541763892398439 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30589123867069484 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4211875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4479720744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/QandoraExp-7B-Persona/4e9dc7ca-f4f2-4c1f-b532-628a8d9d515b.json b/data/hfopenllm_v2/bunnycore/QandoraExp-7B-Persona/4e9dc7ca-f4f2-4c1f-b532-628a8d9d515b.json new file mode 100644 index 000000000..077d5930a --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/QandoraExp-7B-Persona/4e9dc7ca-f4f2-4c1f-b532-628a8d9d515b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_QandoraExp-7B-Persona/1762652580.0533981", + "retrieved_timestamp": "1762652580.053399", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/QandoraExp-7B-Persona", + "developer": "bunnycore", + "inference_platform": "unknown", + "id": "bunnycore/QandoraExp-7B-Persona" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6246858335882126 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5558337526959515 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104229607250755 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43715624999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44074135638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/QandoraExp-7B-v2/85bc0517-382e-4a4c-ac31-ee6de74d2c8f.json b/data/hfopenllm_v2/bunnycore/QandoraExp-7B-v2/85bc0517-382e-4a4c-ac31-ee6de74d2c8f.json new file mode 100644 index 000000000..132764109 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/QandoraExp-7B-v2/85bc0517-382e-4a4c-ac31-ee6de74d2c8f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_QandoraExp-7B-v2/1762652580.053621", + "retrieved_timestamp": "1762652580.053621", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/QandoraExp-7B-v2", + "developer": "bunnycore", + "inference_platform": "unknown", + "id": "bunnycore/QandoraExp-7B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5606889719278182 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5444864824489132 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47129909365558914 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40454166666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.390874335106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/QandoraExp-7B/744f9f56-fbb4-450f-9427-35e6e49ca014.json b/data/hfopenllm_v2/bunnycore/QandoraExp-7B/744f9f56-fbb4-450f-9427-35e6e49ca014.json new file mode 100644 index 000000000..92af2878e --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/QandoraExp-7B/744f9f56-fbb4-450f-9427-35e6e49ca014.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_QandoraExp-7B/1762652580.0531762", + "retrieved_timestamp": "1762652580.0531762", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/QandoraExp-7B", + "developer": "bunnycore", + "inference_platform": "unknown", + "id": "bunnycore/QandoraExp-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7509064836855099 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5477959748047708 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4743202416918429 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43120833333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4409906914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT-R1/636c4294-b3d0-42fc-b437-e4a80f70b4d9.json b/data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT-R1/636c4294-b3d0-42fc-b437-e4a80f70b4d9.json new file mode 100644 index 000000000..63fd5cf9f --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT-R1/636c4294-b3d0-42fc-b437-e4a80f70b4d9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_QwQen-3B-LCoT-R1/1762652580.05408", + "retrieved_timestamp": "1762652580.054081", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/QwQen-3B-LCoT-R1", + "developer": "bunnycore", + "inference_platform": "unknown", + "id": "bunnycore/QwQen-3B-LCoT-R1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.534160471992092 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4798600168403517 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33534743202416917 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41384375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3723404255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.085 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT/bff23021-087b-4118-ba4d-219a97a1dedc.json b/data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT/bff23021-087b-4118-ba4d-219a97a1dedc.json new file mode 100644 index 000000000..3652835b5 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/QwQen-3B-LCoT/bff23021-087b-4118-ba4d-219a97a1dedc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_QwQen-3B-LCoT/1762652580.05384", + "retrieved_timestamp": "1762652580.0538409", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/QwQen-3B-LCoT", + "developer": "bunnycore", + "inference_platform": "unknown", + "id": "bunnycore/QwQen-3B-LCoT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6025290673191577 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4899306773152123 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36178247734138974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41778125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3699301861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Fusion/6d88de9c-062d-4858-95ef-a05f6a29b6c3.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Fusion/6d88de9c-062d-4858-95ef-a05f6a29b6c3.json new file mode 100644 index 000000000..fb0f96fbd --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Fusion/6d88de9c-062d-4858-95ef-a05f6a29b6c3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-Instruct-Fusion/1762652580.0585442", + "retrieved_timestamp": "1762652580.0585449", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-7B-Instruct-Fusion", + "developer": "bunnycore", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-7B-Instruct-Fusion" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6962016338869754 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5491903018724945 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3406344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42971875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4467253989361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1/fe31c10e-8231-49f4-afb3-e2588396c032.json b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1/fe31c10e-8231-49f4-afb3-e2588396c032.json new file mode 100644 index 000000000..04533db90 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1/fe31c10e-8231-49f4-afb3-e2588396c032.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-Instruct-Merge-Stock-v0.1/1762652580.0587678", + "retrieved_timestamp": "1762652580.058769", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1", + "developer": "bunnycore", + "inference_platform": "unknown", + "id": "bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7509064836855099 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5529431709465797 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48942598187311176 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42311458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4383311170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/SmolLM2-1.7-Persona/5249691a-3672-4ccd-98dd-d9b937bca750.json b/data/hfopenllm_v2/bunnycore/SmolLM2-1.7-Persona/5249691a-3672-4ccd-98dd-d9b937bca750.json new file mode 100644 index 000000000..f8892d268 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/SmolLM2-1.7-Persona/5249691a-3672-4ccd-98dd-d9b937bca750.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_SmolLM2-1.7-Persona/1762652580.062155", + "retrieved_timestamp": "1762652580.062156", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/SmolLM2-1.7-Persona", + "developer": "bunnycore", + "inference_platform": "unknown", + "id": "bunnycore/SmolLM2-1.7-Persona" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5465254413844156 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3623213930905173 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.334125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1973902925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.711 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/SmolLM2-1.7B-roleplay-lora/ae109e51-8631-4e09-8839-8e9ed74da4c7.json b/data/hfopenllm_v2/bunnycore/SmolLM2-1.7B-roleplay-lora/ae109e51-8631-4e09-8839-8e9ed74da4c7.json new file mode 100644 index 000000000..8c4d7eec6 --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/SmolLM2-1.7B-roleplay-lora/ae109e51-8631-4e09-8839-8e9ed74da4c7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_SmolLM2-1.7B-roleplay-lora/1762652580.062429", + "retrieved_timestamp": "1762652580.06243", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/SmolLM2-1.7B-roleplay-lora", + "developer": "bunnycore", + "inference_platform": "unknown", + "id": "bunnycore/SmolLM2-1.7B-roleplay-lora" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5382075116247114 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3610343412303005 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.052870090634441085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33945833333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19664228723404256 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 3.423 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/bunnycore/Tulu-3.1-8B-SuperNova/cd979586-e334-4964-b06c-f33c66f09c0e.json b/data/hfopenllm_v2/bunnycore/Tulu-3.1-8B-SuperNova/cd979586-e334-4964-b06c-f33c66f09c0e.json new file mode 100644 index 000000000..0de3813bc --- /dev/null +++ b/data/hfopenllm_v2/bunnycore/Tulu-3.1-8B-SuperNova/cd979586-e334-4964-b06c-f33c66f09c0e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Tulu-3.1-8B-SuperNova/1762652580.062763", + "retrieved_timestamp": "1762652580.0627651", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Tulu-3.1-8B-SuperNova", + "developer": "bunnycore", + "inference_platform": "unknown", + "id": "bunnycore/Tulu-3.1-8B-SuperNova" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8193748143813969 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5254122754311122 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24622356495468278 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3813996010638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/byroneverson/Mistral-Small-Instruct-2409-abliterated/ff0c627b-72b9-45d4-a385-49c8b0ae6b6e.json b/data/hfopenllm_v2/byroneverson/Mistral-Small-Instruct-2409-abliterated/ff0c627b-72b9-45d4-a385-49c8b0ae6b6e.json new file mode 100644 index 000000000..0b0eb1a5a --- /dev/null +++ b/data/hfopenllm_v2/byroneverson/Mistral-Small-Instruct-2409-abliterated/ff0c627b-72b9-45d4-a385-49c8b0ae6b6e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/byroneverson_Mistral-Small-Instruct-2409-abliterated/1762652580.063036", + "retrieved_timestamp": "1762652580.063037", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "byroneverson/Mistral-Small-Instruct-2409-abliterated", + "developer": "byroneverson", + "inference_platform": "unknown", + "id": "byroneverson/Mistral-Small-Instruct-2409-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6970759806203096 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5237864400325174 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24773413897280966 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33305369127516776 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36971875000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39228723404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-16K-abliterated/dc783bb0-c784-4cf4-888b-36a3bfa37a84.json b/data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-16K-abliterated/dc783bb0-c784-4cf4-888b-36a3bfa37a84.json new file mode 100644 index 000000000..1e0526c1d --- /dev/null +++ b/data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-16K-abliterated/dc783bb0-c784-4cf4-888b-36a3bfa37a84.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/byroneverson_Yi-1.5-9B-Chat-16K-abliterated/1762652580.068388", + "retrieved_timestamp": "1762652580.068392", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "byroneverson/Yi-1.5-9B-Chat-16K-abliterated", + "developer": "byroneverson", + "inference_platform": "unknown", + "id": "byroneverson/Yi-1.5-9B-Chat-16K-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5528453392553979 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5282050829986801 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14123867069486404 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4734375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38231382978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-abliterated/345560e2-c981-4aca-9388-4f3a5e95ace8.json b/data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-abliterated/345560e2-c981-4aca-9388-4f3a5e95ace8.json new file mode 100644 index 000000000..985fb6bf8 --- /dev/null +++ b/data/hfopenllm_v2/byroneverson/Yi-1.5-9B-Chat-abliterated/345560e2-c981-4aca-9388-4f3a5e95ace8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/byroneverson_Yi-1.5-9B-Chat-abliterated/1762652580.070213", + "retrieved_timestamp": "1762652580.070215", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "byroneverson/Yi-1.5-9B-Chat-abliterated", + "developer": "byroneverson", + "inference_platform": "unknown", + "id": "byroneverson/Yi-1.5-9B-Chat-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5723291976400395 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5401219363002313 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43886458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3715093085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/c10x/Q-Pluse/2093ba5f-d2f8-45d2-bcf7-ff48810c47af.json b/data/hfopenllm_v2/c10x/Q-Pluse/2093ba5f-d2f8-45d2-bcf7-ff48810c47af.json new file mode 100644 index 000000000..5680463d0 --- /dev/null +++ b/data/hfopenllm_v2/c10x/Q-Pluse/2093ba5f-d2f8-45d2-bcf7-ff48810c47af.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/c10x_Q-Pluse/1762652580.070795", + "retrieved_timestamp": "1762652580.070796", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "c10x/Q-Pluse", + "developer": "c10x", + "inference_platform": "unknown", + "id": "c10x/Q-Pluse" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11228318638988993 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2875111436321769 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24664429530201343 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39381249999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11353058510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/c10x/longthinker/fe7bd3bb-71a4-46dd-a86d-b5a24b685fa5.json b/data/hfopenllm_v2/c10x/longthinker/fe7bd3bb-71a4-46dd-a86d-b5a24b685fa5.json new file mode 100644 index 000000000..325dee744 --- /dev/null +++ b/data/hfopenllm_v2/c10x/longthinker/fe7bd3bb-71a4-46dd-a86d-b5a24b685fa5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/c10x_longthinker/1762652580.078971", + "retrieved_timestamp": "1762652580.078974", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "c10x/longthinker", + "developer": "c10x", + "inference_platform": "unknown", + "id": "c10x/longthinker" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36087913403103766 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49274888053364546 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23187311178247735 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3909583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3527260638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/carsenk/flippa-v6/a4bcc6f3-b745-48f7-a394-90cd42363aae.json b/data/hfopenllm_v2/carsenk/flippa-v6/a4bcc6f3-b745-48f7-a394-90cd42363aae.json new file mode 100644 index 000000000..2843e36d2 --- /dev/null +++ b/data/hfopenllm_v2/carsenk/flippa-v6/a4bcc6f3-b745-48f7-a394-90cd42363aae.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/carsenk_flippa-v6/1762652580.079394", + "retrieved_timestamp": "1762652580.079395", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "carsenk/flippa-v6", + "developer": "carsenk", + "inference_platform": "unknown", + "id": "carsenk/flippa-v6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3439429602344003 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5046972457053399 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1404833836858006 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40887500000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3667719414893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 16.061 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cgato/TheSalt-L3-8b-v0.3.2/aa805bcc-3847-40b5-86eb-397982106d18.json b/data/hfopenllm_v2/cgato/TheSalt-L3-8b-v0.3.2/aa805bcc-3847-40b5-86eb-397982106d18.json new file mode 100644 index 000000000..4b82dee13 --- /dev/null +++ b/data/hfopenllm_v2/cgato/TheSalt-L3-8b-v0.3.2/aa805bcc-3847-40b5-86eb-397982106d18.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cgato_TheSalt-L3-8b-v0.3.2/1762652580.100134", + "retrieved_timestamp": "1762652580.100136", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cgato/TheSalt-L3-8b-v0.3.2", + "developer": "cgato", + "inference_platform": "unknown", + "id": "cgato/TheSalt-L3-8b-v0.3.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27050337548814923 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29679653176003074 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38962499999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11394614361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO/bdf85c5c-6eaa-4df6-a393-66b71aa28952.json b/data/hfopenllm_v2/chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO/bdf85c5c-6eaa-4df6-a393-66b71aa28952.json new file mode 100644 index 000000000..ec1f92016 --- /dev/null +++ b/data/hfopenllm_v2/chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO/bdf85c5c-6eaa-4df6-a393-66b71aa28952.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/chujiezheng_Llama-3-Instruct-8B-SimPO-ExPO/1762652580.1008909", + "retrieved_timestamp": "1762652580.100893", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO", + "developer": "chujiezheng", + "inference_platform": "unknown", + "id": "chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6433707008515184 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4764515968840137 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0702416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3920104166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.340093085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cjvt/GaMS-1B/e9acbb25-2b96-4a2a-92ff-d2b68c0e49f8.json b/data/hfopenllm_v2/cjvt/GaMS-1B/e9acbb25-2b96-4a2a-92ff-d2b68c0e49f8.json new file mode 100644 index 000000000..507e6b261 --- /dev/null +++ b/data/hfopenllm_v2/cjvt/GaMS-1B/e9acbb25-2b96-4a2a-92ff-d2b68c0e49f8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cjvt_GaMS-1B/1762652580.101496", + "retrieved_timestamp": "1762652580.1014972", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cjvt/GaMS-1B", + "developer": "cjvt", + "inference_platform": "unknown", + "id": "cjvt/GaMS-1B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.163541625110263 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3074752552734472 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36841666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11486037234042554 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "OPTForCausalLM", + "params_billions": 1.54 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cloudyu/Mixtral_11Bx2_MoE_19B/9be76c82-0f70-4b76-8476-7707d4da85bb.json b/data/hfopenllm_v2/cloudyu/Mixtral_11Bx2_MoE_19B/9be76c82-0f70-4b76-8476-7707d4da85bb.json new file mode 100644 index 000000000..d617801d9 --- /dev/null +++ b/data/hfopenllm_v2/cloudyu/Mixtral_11Bx2_MoE_19B/9be76c82-0f70-4b76-8476-7707d4da85bb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cloudyu_Mixtral_11Bx2_MoE_19B/1762652580.102268", + "retrieved_timestamp": "1762652580.102269", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cloudyu/Mixtral_11Bx2_MoE_19B", + "developer": "cloudyu", + "inference_platform": "unknown", + "id": "cloudyu/Mixtral_11Bx2_MoE_19B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3850837998732253 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5208516020145867 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06722054380664652 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4296875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33111702127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 19.188 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cloudyu/Mixtral_34Bx2_MoE_60B/fdbef33b-dffb-4146-bc83-f8b03c842b2e.json b/data/hfopenllm_v2/cloudyu/Mixtral_34Bx2_MoE_60B/fdbef33b-dffb-4146-bc83-f8b03c842b2e.json new file mode 100644 index 000000000..8e537411a --- /dev/null +++ b/data/hfopenllm_v2/cloudyu/Mixtral_34Bx2_MoE_60B/fdbef33b-dffb-4146-bc83-f8b03c842b2e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cloudyu_Mixtral_34Bx2_MoE_60B/1762652580.102543", + "retrieved_timestamp": "1762652580.1025438", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cloudyu/Mixtral_34Bx2_MoE_60B", + "developer": "cloudyu", + "inference_platform": "unknown", + "id": "cloudyu/Mixtral_34Bx2_MoE_60B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4537770892343427 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5869701263465353 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0770392749244713 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4625208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47664561170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 60.814 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cloudyu/Mixtral_7Bx2_MoE/b6c048f5-b01e-4e51-8a6c-c068dfd199ef.json b/data/hfopenllm_v2/cloudyu/Mixtral_7Bx2_MoE/b6c048f5-b01e-4e51-8a6c-c068dfd199ef.json new file mode 100644 index 000000000..584a9d460 --- /dev/null +++ b/data/hfopenllm_v2/cloudyu/Mixtral_7Bx2_MoE/b6c048f5-b01e-4e51-8a6c-c068dfd199ef.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cloudyu_Mixtral_7Bx2_MoE/1762652580.102766", + "retrieved_timestamp": "1762652580.102767", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cloudyu/Mixtral_7Bx2_MoE", + "developer": "cloudyu", + "inference_platform": "unknown", + "id": "cloudyu/Mixtral_7Bx2_MoE" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4480068440626427 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5159732691655027 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06873111782477341 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44729166666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30435505319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cloudyu/Yi-34Bx2-MoE-60B-DPO/542d450b-8108-4abe-a2ae-5b9a577558d6.json b/data/hfopenllm_v2/cloudyu/Yi-34Bx2-MoE-60B-DPO/542d450b-8108-4abe-a2ae-5b9a577558d6.json new file mode 100644 index 000000000..4c50c883b --- /dev/null +++ b/data/hfopenllm_v2/cloudyu/Yi-34Bx2-MoE-60B-DPO/542d450b-8108-4abe-a2ae-5b9a577558d6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cloudyu_Yi-34Bx2-MoE-60B-DPO/1762652580.108832", + "retrieved_timestamp": "1762652580.1088362", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cloudyu/Yi-34Bx2-MoE-60B-DPO", + "developer": "cloudyu", + "inference_platform": "unknown", + "id": "cloudyu/Yi-34Bx2-MoE-60B-DPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.531887613753729 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.516831447641953 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0702416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221476510067114 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43746875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46766954787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 60.814 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cpayne1303/cp2024-instruct/247e1c1e-ce27-4645-a2ae-4177f08ea4a5.json b/data/hfopenllm_v2/cpayne1303/cp2024-instruct/247e1c1e-ce27-4645-a2ae-4177f08ea4a5.json new file mode 100644 index 000000000..0d50fb2bc --- /dev/null +++ b/data/hfopenllm_v2/cpayne1303/cp2024-instruct/247e1c1e-ce27-4645-a2ae-4177f08ea4a5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cpayne1303_cp2024-instruct/1762652580.116854", + "retrieved_timestamp": "1762652580.116854", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cpayne1303/cp2024-instruct", + "developer": "cpayne1303", + "inference_platform": "unknown", + "id": "cpayne1303/cp2024-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17061064641817045 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2946778102988436 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3686354166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11668882978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.031 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cpayne1303/cp2024/2bfb7bea-a344-4249-8bdc-e6c483518df5.json b/data/hfopenllm_v2/cpayne1303/cp2024/2bfb7bea-a344-4249-8bdc-e6c483518df5.json new file mode 100644 index 000000000..5e4f63731 --- /dev/null +++ b/data/hfopenllm_v2/cpayne1303/cp2024/2bfb7bea-a344-4249-8bdc-e6c483518df5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cpayne1303_cp2024/1762652580.116582", + "retrieved_timestamp": "1762652580.1165829", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cpayne1303/cp2024", + "developer": "cpayne1303", + "inference_platform": "unknown", + "id": "cpayne1303/cp2024" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16581448334862608 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29853854089245085 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.005287009063444109 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3383125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11012300531914894 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.031 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cpayne1303/smallcp2024/fcbede38-3a5b-4cd7-b144-cbf26cc05df9.json b/data/hfopenllm_v2/cpayne1303/smallcp2024/fcbede38-3a5b-4cd7-b144-cbf26cc05df9.json new file mode 100644 index 000000000..295373ed1 --- /dev/null +++ b/data/hfopenllm_v2/cpayne1303/smallcp2024/fcbede38-3a5b-4cd7-b144-cbf26cc05df9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cpayne1303_smallcp2024/1762652580.117528", + "retrieved_timestamp": "1762652580.117528", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cpayne1303/smallcp2024", + "developer": "cpayne1303", + "inference_platform": "unknown", + "id": "cpayne1303/smallcp2024" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1581958093414363 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3027047714604053 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.005287009063444109 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23070469798657717 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34246874999999993 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11136968085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.002 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/crestf411/MN-Slush/b32a7808-7a64-41a8-aad4-030efc512906.json b/data/hfopenllm_v2/crestf411/MN-Slush/b32a7808-7a64-41a8-aad4-030efc512906.json new file mode 100644 index 000000000..014effd43 --- /dev/null +++ b/data/hfopenllm_v2/crestf411/MN-Slush/b32a7808-7a64-41a8-aad4-030efc512906.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/crestf411_MN-Slush/1762652580.117737", + "retrieved_timestamp": "1762652580.117738", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "crestf411/MN-Slush", + "developer": "crestf411", + "inference_platform": "unknown", + "id": "crestf411/MN-Slush" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4077148632295642 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5340014235282594 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1268882175226586 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39328125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3508144946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/cyberagent/calm3-22b-chat/b7ce290d-d082-4586-ac4b-516e8130ddc2.json b/data/hfopenllm_v2/cyberagent/calm3-22b-chat/b7ce290d-d082-4586-ac4b-516e8130ddc2.json new file mode 100644 index 000000000..ed290f1f1 --- /dev/null +++ b/data/hfopenllm_v2/cyberagent/calm3-22b-chat/b7ce290d-d082-4586-ac4b-516e8130ddc2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cyberagent_calm3-22b-chat/1762652580.118237", + "retrieved_timestamp": "1762652580.118238", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cyberagent/calm3-22b-chat", + "developer": "cyberagent", + "inference_platform": "unknown", + "id": "cyberagent/calm3-22b-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.509131327100981 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4991683247746046 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06948640483383686 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45532291666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29496343085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 22.543 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/darkc0de/BuddyGlassNeverSleeps/675f6dfe-c623-4694-94cb-8705aab5521f.json b/data/hfopenllm_v2/darkc0de/BuddyGlassNeverSleeps/675f6dfe-c623-4694-94cb-8705aab5521f.json new file mode 100644 index 000000000..66e7450af --- /dev/null +++ b/data/hfopenllm_v2/darkc0de/BuddyGlassNeverSleeps/675f6dfe-c623-4694-94cb-8705aab5521f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/darkc0de_BuddyGlassNeverSleeps/1762652580.1184928", + "retrieved_timestamp": "1762652580.118494", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "darkc0de/BuddyGlassNeverSleeps", + "developer": "darkc0de", + "inference_platform": "unknown", + "id": "darkc0de/BuddyGlassNeverSleeps" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4239019135892764 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49772281653646816 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06268882175226587 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3992708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34524601063829785 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/darkc0de/BuddyGlassUncensored2025.2/ea8dfb5f-750d-4573-a2bb-dadafc3a73b7.json b/data/hfopenllm_v2/darkc0de/BuddyGlassUncensored2025.2/ea8dfb5f-750d-4573-a2bb-dadafc3a73b7.json new file mode 100644 index 000000000..f2d90db12 --- /dev/null +++ b/data/hfopenllm_v2/darkc0de/BuddyGlassUncensored2025.2/ea8dfb5f-750d-4573-a2bb-dadafc3a73b7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/darkc0de_BuddyGlassUncensored2025.2/1762652580.118735", + "retrieved_timestamp": "1762652580.1187358", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "darkc0de/BuddyGlassUncensored2025.2", + "developer": "darkc0de", + "inference_platform": "unknown", + "id": "darkc0de/BuddyGlassUncensored2025.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7731131176389756 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6095411371819216 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24018126888217523 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4070833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43359375 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/adf85459-eba0-48a8-ad54-1e17d1ea5b31.json b/data/hfopenllm_v2/darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/adf85459-eba0-48a8-ad54-1e17d1ea5b31.json new file mode 100644 index 000000000..5149d1b9e --- /dev/null +++ b/data/hfopenllm_v2/darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/adf85459-eba0-48a8-ad54-1e17d1ea5b31.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/darkc0de_BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/1762652580.1189609", + "retrieved_timestamp": "1762652580.1189609", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp", + "developer": "darkc0de", + "inference_platform": "unknown", + "id": "darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43584245357872664 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5243087998656722 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4143333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36727061170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.007 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/databricks/dbrx-base/17febb53-0735-4983-8049-85319818ab84.json b/data/hfopenllm_v2/databricks/dbrx-base/17febb53-0735-4983-8049-85319818ab84.json new file mode 100644 index 000000000..e09c2c814 --- /dev/null +++ b/data/hfopenllm_v2/databricks/dbrx-base/17febb53-0735-4983-8049-85319818ab84.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/databricks_dbrx-base/1762652580.1191711", + "retrieved_timestamp": "1762652580.1191711", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "databricks/dbrx-base", + "developer": "databricks", + "inference_platform": "unknown", + "id": "databricks/dbrx-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08214723926380368 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5195833333333334 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32666666666666666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4066666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Unknown", + "params_billions": 0.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/databricks/dbrx-instruct/639e4921-9fa8-446d-b539-f03a7589b142.json b/data/hfopenllm_v2/databricks/dbrx-instruct/639e4921-9fa8-446d-b539-f03a7589b142.json new file mode 100644 index 000000000..73288ef79 --- /dev/null +++ b/data/hfopenllm_v2/databricks/dbrx-instruct/639e4921-9fa8-446d-b539-f03a7589b142.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/databricks_dbrx-instruct/1762652580.119466", + "retrieved_timestamp": "1762652580.119467", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "databricks/dbrx-instruct", + "developer": "databricks", + "inference_platform": "unknown", + "id": "databricks/dbrx-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5415796752616391 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5428960796934387 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06873111782477341 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3414429530201342 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42692708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36826795212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "DbrxForCausalLM", + "params_billions": 131.597 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/databricks/dolly-v1-6b/62299ec1-dd42-4751-a224-3bdda71d3cdf.json b/data/hfopenllm_v2/databricks/dolly-v1-6b/62299ec1-dd42-4751-a224-3bdda71d3cdf.json new file mode 100644 index 000000000..2d277c2d8 --- /dev/null +++ b/data/hfopenllm_v2/databricks/dolly-v1-6b/62299ec1-dd42-4751-a224-3bdda71d3cdf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/databricks_dolly-v1-6b/1762652580.1196742", + "retrieved_timestamp": "1762652580.119675", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "databricks/dolly-v1-6b", + "developer": "databricks", + "inference_platform": "unknown", + "id": "databricks/dolly-v1-6b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22244311759464885 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3172089528774696 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0188821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40041666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12657912234042554 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPTJForCausalLM", + "params_billions": 6.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/databricks/dolly-v2-12b/c83e2bf0-5d4e-45c4-aff2-27aea2bc0fb6.json b/data/hfopenllm_v2/databricks/dolly-v2-12b/c83e2bf0-5d4e-45c4-aff2-27aea2bc0fb6.json new file mode 100644 index 000000000..6e9ddbcf4 --- /dev/null +++ b/data/hfopenllm_v2/databricks/dolly-v2-12b/c83e2bf0-5d4e-45c4-aff2-27aea2bc0fb6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/databricks_dolly-v2-12b/1762652580.1198819", + "retrieved_timestamp": "1762652580.119883", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "databricks/dolly-v2-12b", + "developer": "databricks", + "inference_platform": "unknown", + "id": "databricks/dolly-v2-12b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23550734273948679 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33199731673771277 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2407718120805369 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37390625000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11286569148936171 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 12.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/databricks/dolly-v2-3b/a8838707-f188-440e-801f-e780e0dd362a.json b/data/hfopenllm_v2/databricks/dolly-v2-3b/a8838707-f188-440e-801f-e780e0dd362a.json new file mode 100644 index 000000000..389c515ef --- /dev/null +++ b/data/hfopenllm_v2/databricks/dolly-v2-3b/a8838707-f188-440e-801f-e780e0dd362a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/databricks_dolly-v2-3b/1762652580.1200871", + "retrieved_timestamp": "1762652580.1200871", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "databricks/dolly-v2-3b", + "developer": "databricks", + "inference_platform": "unknown", + "id": "databricks/dolly-v2-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22471597583301195 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30792785961544844 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33378125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11452792553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 3.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/databricks/dolly-v2-7b/68f999d7-2dc2-4b3c-ab02-6140387893c0.json b/data/hfopenllm_v2/databricks/dolly-v2-7b/68f999d7-2dc2-4b3c-ab02-6140387893c0.json new file mode 100644 index 000000000..83c7f17df --- /dev/null +++ b/data/hfopenllm_v2/databricks/dolly-v2-7b/68f999d7-2dc2-4b3c-ab02-6140387893c0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/databricks_dolly-v2-7b/1762652580.120286", + "retrieved_timestamp": "1762652580.120287", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "databricks/dolly-v2-7b", + "developer": "databricks", + "inference_platform": "unknown", + "id": "databricks/dolly-v2-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2009856070781083 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31730628122070326 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35530208333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1149434840425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/davidkim205/Rhea-72b-v0.5/106de4e2-a8d3-40d3-bdbc-0b95930e9ba6.json b/data/hfopenllm_v2/davidkim205/Rhea-72b-v0.5/106de4e2-a8d3-40d3-bdbc-0b95930e9ba6.json new file mode 100644 index 000000000..91c621166 --- /dev/null +++ b/data/hfopenllm_v2/davidkim205/Rhea-72b-v0.5/106de4e2-a8d3-40d3-bdbc-0b95930e9ba6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/davidkim205_Rhea-72b-v0.5/1762652580.1208682", + "retrieved_timestamp": "1762652580.1208699", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "davidkim205/Rhea-72b-v0.5", + "developer": "davidkim205", + "inference_platform": "unknown", + "id": "davidkim205/Rhea-72b-v0.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014538092261865185 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30783395929068597 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17371601208459214 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42413541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11660571808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 72.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/davidkim205/nox-solar-10.7b-v4/fcc755d0-6269-49e6-890b-4a14417601a1.json b/data/hfopenllm_v2/davidkim205/nox-solar-10.7b-v4/fcc755d0-6269-49e6-890b-4a14417601a1.json new file mode 100644 index 000000000..31d77ef78 --- /dev/null +++ b/data/hfopenllm_v2/davidkim205/nox-solar-10.7b-v4/fcc755d0-6269-49e6-890b-4a14417601a1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/davidkim205_nox-solar-10.7b-v4/1762652580.1212", + "retrieved_timestamp": "1762652580.1212008", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "davidkim205/nox-solar-10.7b-v4", + "developer": "davidkim205", + "inference_platform": "unknown", + "id": "davidkim205/nox-solar-10.7b-v4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3753418706809044 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4814038018918371 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.008308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42984375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3332779255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/deepseek-ai/deepseek-llm-67b-chat/eeea1c5c-bf81-4533-aace-ccb85153320f.json b/data/hfopenllm_v2/deepseek-ai/deepseek-llm-67b-chat/eeea1c5c-bf81-4533-aace-ccb85153320f.json new file mode 100644 index 000000000..056c4fa86 --- /dev/null +++ b/data/hfopenllm_v2/deepseek-ai/deepseek-llm-67b-chat/eeea1c5c-bf81-4533-aace-ccb85153320f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-llm-67b-chat/1762652580.1230679", + "retrieved_timestamp": "1762652580.1230688", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "deepseek-ai/deepseek-llm-67b-chat", + "developer": "deepseek-ai", + "inference_platform": "unknown", + "id": "deepseek-ai/deepseek-llm-67b-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5587153197959193 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5243416179742358 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09290030211480363 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5058645833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3943650265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 67.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-base/e11d46c2-c121-4c74-94ae-e6ec9a5898af.json b/data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-base/e11d46c2-c121-4c74-94ae-e6ec9a5898af.json new file mode 100644 index 000000000..47dcc8706 --- /dev/null +++ b/data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-base/e11d46c2-c121-4c74-94ae-e6ec9a5898af.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-llm-7b-base/1762652580.1234062", + "retrieved_timestamp": "1762652580.1234071", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "deepseek-ai/deepseek-llm-7b-base", + "developer": "deepseek-ai", + "inference_platform": "unknown", + "id": "deepseek-ai/deepseek-llm-7b-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.217871913190335 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35030315829299524 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37378124999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18060172872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-chat/b9dd96f5-6ab0-4df4-9ee2-bd34c4c9fb05.json b/data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-chat/b9dd96f5-6ab0-4df4-9ee2-bd34c4c9fb05.json new file mode 100644 index 000000000..aade62d71 --- /dev/null +++ b/data/hfopenllm_v2/deepseek-ai/deepseek-llm-7b-chat/b9dd96f5-6ab0-4df4-9ee2-bd34c4c9fb05.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-llm-7b-chat/1762652580.123629", + "retrieved_timestamp": "1762652580.12363", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "deepseek-ai/deepseek-llm-7b-chat", + "developer": "deepseek-ai", + "inference_platform": "unknown", + "id": "deepseek-ai/deepseek-llm-7b-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4170822307034225 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3632079760108669 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46677083333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21334773936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-base/32767af1-f01b-42ca-a8e2-6fecc5af4bfc.json b/data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-base/32767af1-f01b-42ca-a8e2-6fecc5af4bfc.json new file mode 100644 index 000000000..01dd2b9ec --- /dev/null +++ b/data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-base/32767af1-f01b-42ca-a8e2-6fecc5af4bfc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-moe-16b-base/1762652580.123848", + "retrieved_timestamp": "1762652580.123849", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "deepseek-ai/deepseek-moe-16b-base", + "developer": "deepseek-ai", + "inference_platform": "unknown", + "id": "deepseek-ai/deepseek-moe-16b-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2449744455821664 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3409461055246395 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02416918429003021 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36578125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1505152925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "DeepseekForCausalLM", + "params_billions": 16.376 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-chat/81c514f2-5a06-4d50-8c00-dc8b97529f46.json b/data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-chat/81c514f2-5a06-4d50-8c00-dc8b97529f46.json new file mode 100644 index 000000000..ba3087ba1 --- /dev/null +++ b/data/hfopenllm_v2/deepseek-ai/deepseek-moe-16b-chat/81c514f2-5a06-4d50-8c00-dc8b97529f46.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-moe-16b-chat/1762652580.1240609", + "retrieved_timestamp": "1762652580.124062", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "deepseek-ai/deepseek-moe-16b-chat", + "developer": "deepseek-ai", + "inference_platform": "unknown", + "id": "deepseek-ai/deepseek-moe-16b-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36629919724109805 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3274953026448241 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0256797583081571 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22483221476510068 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38076041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1963929521276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "DeepseekForCausalLM", + "params_billions": 16.376 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dfurman/CalmeRys-78B-Orpo-v0.1/31d8cf18-7b35-438e-8dc6-cdba0f593348.json b/data/hfopenllm_v2/dfurman/CalmeRys-78B-Orpo-v0.1/31d8cf18-7b35-438e-8dc6-cdba0f593348.json new file mode 100644 index 000000000..25f02e5af --- /dev/null +++ b/data/hfopenllm_v2/dfurman/CalmeRys-78B-Orpo-v0.1/31d8cf18-7b35-438e-8dc6-cdba0f593348.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dfurman_CalmeRys-78B-Orpo-v0.1/1762652580.124436", + "retrieved_timestamp": "1762652580.124437", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dfurman/CalmeRys-78B-Orpo-v0.1", + "developer": "dfurman", + "inference_platform": "unknown", + "id": "dfurman/CalmeRys-78B-Orpo-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8163273447785211 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7262282792249927 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4001677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5901770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7012134308510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 77.965 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dicta-il/dictalm2.0-instruct/4fc01471-7a04-4f46-a973-42f5a3fd67be.json b/data/hfopenllm_v2/dicta-il/dictalm2.0-instruct/4fc01471-7a04-4f46-a973-42f5a3fd67be.json new file mode 100644 index 000000000..292a1e9e3 --- /dev/null +++ b/data/hfopenllm_v2/dicta-il/dictalm2.0-instruct/4fc01471-7a04-4f46-a973-42f5a3fd67be.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dicta-il_dictalm2.0-instruct/1762652580.126274", + "retrieved_timestamp": "1762652580.126276", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dicta-il/dictalm2.0-instruct", + "developer": "dicta-il", + "inference_platform": "unknown", + "id": "dicta-il/dictalm2.0-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44121264910437635 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42560784985912875 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.022658610271903322 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39458333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2604720744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.251 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dicta-il/dictalm2.0/613c1922-270a-4e8b-ae9d-20fa25573258.json b/data/hfopenllm_v2/dicta-il/dictalm2.0/613c1922-270a-4e8b-ae9d-20fa25573258.json new file mode 100644 index 000000000..56fed06a5 --- /dev/null +++ b/data/hfopenllm_v2/dicta-il/dictalm2.0/613c1922-270a-4e8b-ae9d-20fa25573258.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dicta-il_dictalm2.0/1762652580.125907", + "retrieved_timestamp": "1762652580.125909", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dicta-il/dictalm2.0", + "developer": "dicta-il", + "inference_platform": "unknown", + "id": "dicta-il/dictalm2.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24132745559559746 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4017869112495909 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01812688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38196874999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2604720744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.251 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/divyanshukunwar/SASTRI_1_9B/f0ccf0c5-269f-46e1-a13e-b54f2903779b.json b/data/hfopenllm_v2/divyanshukunwar/SASTRI_1_9B/f0ccf0c5-269f-46e1-a13e-b54f2903779b.json new file mode 100644 index 000000000..443b8a915 --- /dev/null +++ b/data/hfopenllm_v2/divyanshukunwar/SASTRI_1_9B/f0ccf0c5-269f-46e1-a13e-b54f2903779b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/divyanshukunwar_SASTRI_1_9B/1762652580.1269271", + "retrieved_timestamp": "1762652580.1269279", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "divyanshukunwar/SASTRI_1_9B", + "developer": "divyanshukunwar", + "inference_platform": "unknown", + "id": "divyanshukunwar/SASTRI_1_9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4207292206899914 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4680499051118341 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11555891238670694 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3831145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187333776595745 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 5.211 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base/f64d7325-38eb-4cd4-80b3-bd63d4acb72f.json b/data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base/f64d7325-38eb-4cd4-80b3-bd63d4acb72f.json new file mode 100644 index 000000000..e9a4c922f --- /dev/null +++ b/data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base/f64d7325-38eb-4cd4-80b3-bd63d4acb72f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/djuna-test-lab_TEST-L3.2-ReWish-3B-ties-w-base/1762652580.131253", + "retrieved_timestamp": "1762652580.131254", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base", + "developer": "djuna-test-lab", + "inference_platform": "unknown", + "id": "djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.635252241829457 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.449540552927623 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13670694864048338 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37775 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31258311170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B/6d57a63e-0fa7-442b-9156-5a8985e04762.json b/data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B/6d57a63e-0fa7-442b-9156-5a8985e04762.json new file mode 100644 index 000000000..1cea8806e --- /dev/null +++ b/data/hfopenllm_v2/djuna-test-lab/TEST-L3.2-ReWish-3B/6d57a63e-0fa7-442b-9156-5a8985e04762.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/djuna-test-lab_TEST-L3.2-ReWish-3B/1762652580.131", + "retrieved_timestamp": "1762652580.131001", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "djuna-test-lab/TEST-L3.2-ReWish-3B", + "developer": "djuna-test-lab", + "inference_platform": "unknown", + "id": "djuna-test-lab/TEST-L3.2-ReWish-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6367759766308949 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.449540552927623 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13670694864048338 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37775 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31258311170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/G2-BigGSHT-27B-2/69cc67cc-52f9-464a-ab04-b00bb3d8c459.json b/data/hfopenllm_v2/djuna/G2-BigGSHT-27B-2/69cc67cc-52f9-464a-ab04-b00bb3d8c459.json new file mode 100644 index 000000000..03d916782 --- /dev/null +++ b/data/hfopenllm_v2/djuna/G2-BigGSHT-27B-2/69cc67cc-52f9-464a-ab04-b00bb3d8c459.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/djuna_G2-BigGSHT-27B-2/1762652580.1272058", + "retrieved_timestamp": "1762652580.1272068", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "djuna/G2-BigGSHT-27B-2", + "developer": "djuna", + "inference_platform": "unknown", + "id": "djuna/G2-BigGSHT-27B-2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7974430067775724 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.641474454273013 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2348942598187311 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36325503355704697 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40720833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45279255319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/G2-GSHT/b012b4a9-52d9-4b75-b80d-819579572f05.json b/data/hfopenllm_v2/djuna/G2-GSHT/b012b4a9-52d9-4b75-b80d-819579572f05.json new file mode 100644 index 000000000..166684ee1 --- /dev/null +++ b/data/hfopenllm_v2/djuna/G2-GSHT/b012b4a9-52d9-4b75-b80d-819579572f05.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/djuna_G2-GSHT/1762652580.127527", + "retrieved_timestamp": "1762652580.127528", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "djuna/G2-GSHT", + "developer": "djuna", + "inference_platform": "unknown", + "id": "djuna/G2-GSHT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5630116978505919 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5269730491270207 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19259818731117825 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32550335570469796 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40057291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070146276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/L3.1-ForStHS/2d9e083d-2c5e-4f42-ab27-6f0c150ee4db.json b/data/hfopenllm_v2/djuna/L3.1-ForStHS/2d9e083d-2c5e-4f42-ab27-6f0c150ee4db.json new file mode 100644 index 000000000..cd6dead2f --- /dev/null +++ b/data/hfopenllm_v2/djuna/L3.1-ForStHS/2d9e083d-2c5e-4f42-ab27-6f0c150ee4db.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/djuna_L3.1-ForStHS/1762652580.128124", + "retrieved_timestamp": "1762652580.128125", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "djuna/L3.1-ForStHS", + "developer": "djuna", + "inference_platform": "unknown", + "id": "djuna/L3.1-ForStHS" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7813313120298586 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5202703381267152 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15030211480362538 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40264583333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37350398936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc/f738c507-0826-4d7a-a999-8a01274d8697.json b/data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc/f738c507-0826-4d7a-a999-8a01274d8697.json new file mode 100644 index 000000000..bc7e36f8f --- /dev/null +++ b/data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc/f738c507-0826-4d7a-a999-8a01274d8697.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/djuna_L3.1-Promissum_Mane-8B-Della-1.5-calc/1762652580.1283488", + "retrieved_timestamp": "1762652580.12835", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc", + "developer": "djuna", + "inference_platform": "unknown", + "id": "djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7235291249440374 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5432920704935255 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16389728096676737 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42528125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.390375664893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-calc/54d2c316-3c41-4d13-879d-a23c071a6885.json b/data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-calc/54d2c316-3c41-4d13-879d-a23c071a6885.json new file mode 100644 index 000000000..52c4e55aa --- /dev/null +++ b/data/hfopenllm_v2/djuna/L3.1-Promissum_Mane-8B-Della-calc/54d2c316-3c41-4d13-879d-a23c071a6885.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/djuna_L3.1-Promissum_Mane-8B-Della-calc/1762652580.128573", + "retrieved_timestamp": "1762652580.128574", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "djuna/L3.1-Promissum_Mane-8B-Della-calc", + "developer": "djuna", + "inference_platform": "unknown", + "id": "djuna/L3.1-Promissum_Mane-8B-Della-calc" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.544152847777231 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.548587625935678 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18429003021148035 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4229895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3801529255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/L3.1-Purosani-2-8B/f1cc7f8d-72da-40ef-8cb1-f069cd0c052e.json b/data/hfopenllm_v2/djuna/L3.1-Purosani-2-8B/f1cc7f8d-72da-40ef-8cb1-f069cd0c052e.json new file mode 100644 index 000000000..ca8de8972 --- /dev/null +++ b/data/hfopenllm_v2/djuna/L3.1-Purosani-2-8B/f1cc7f8d-72da-40ef-8cb1-f069cd0c052e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/djuna_L3.1-Purosani-2-8B/1762652580.128782", + "retrieved_timestamp": "1762652580.128783", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "djuna/L3.1-Purosani-2-8B", + "developer": "djuna", + "inference_platform": "unknown", + "id": "djuna/L3.1-Purosani-2-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4988153654525548 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5182122256069372 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11706948640483383 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38162499999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3751662234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/L3.1-Suze-Vume-calc/3a48a9ec-61a5-45fd-903a-de2ef90ef13e.json b/data/hfopenllm_v2/djuna/L3.1-Suze-Vume-calc/3a48a9ec-61a5-45fd-903a-de2ef90ef13e.json new file mode 100644 index 000000000..ed3eadb3c --- /dev/null +++ b/data/hfopenllm_v2/djuna/L3.1-Suze-Vume-calc/3a48a9ec-61a5-45fd-903a-de2ef90ef13e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/djuna_L3.1-Suze-Vume-calc/1762652580.128992", + "retrieved_timestamp": "1762652580.128992", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "djuna/L3.1-Suze-Vume-calc", + "developer": "djuna", + "inference_platform": "unknown", + "id": "djuna/L3.1-Suze-Vume-calc" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7296739318341999 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.516421105092519 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11404833836858005 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38429166666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35147938829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/MN-Chinofun-12B-2/7b384a2a-50c5-4c04-a9dd-5a9acefbd81f.json b/data/hfopenllm_v2/djuna/MN-Chinofun-12B-2/7b384a2a-50c5-4c04-a9dd-5a9acefbd81f.json new file mode 100644 index 000000000..15cc006f7 --- /dev/null +++ b/data/hfopenllm_v2/djuna/MN-Chinofun-12B-2/7b384a2a-50c5-4c04-a9dd-5a9acefbd81f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/djuna_MN-Chinofun-12B-2/1762652580.129499", + "retrieved_timestamp": "1762652580.1295", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "djuna/MN-Chinofun-12B-2", + "developer": "djuna", + "inference_platform": "unknown", + "id": "djuna/MN-Chinofun-12B-2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6170671595810228 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5036959998266032 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13066465256797583 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42683333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3615359042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/MN-Chinofun-12B-3/32a4d80a-9d28-47f4-b68f-36e95a400bf2.json b/data/hfopenllm_v2/djuna/MN-Chinofun-12B-3/32a4d80a-9d28-47f4-b68f-36e95a400bf2.json new file mode 100644 index 000000000..8c1b94074 --- /dev/null +++ b/data/hfopenllm_v2/djuna/MN-Chinofun-12B-3/32a4d80a-9d28-47f4-b68f-36e95a400bf2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/djuna_MN-Chinofun-12B-3/1762652580.129836", + "retrieved_timestamp": "1762652580.129837", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "djuna/MN-Chinofun-12B-3", + "developer": "djuna", + "inference_platform": "unknown", + "id": "djuna/MN-Chinofun-12B-3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3052744495715812 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.53478574603334 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10045317220543806 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4197916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3026097074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/MN-Chinofun-12B-4/4f09e60c-e68a-426c-ac7e-f5e6755e14be.json b/data/hfopenllm_v2/djuna/MN-Chinofun-12B-4/4f09e60c-e68a-426c-ac7e-f5e6755e14be.json new file mode 100644 index 000000000..a90ff54cc --- /dev/null +++ b/data/hfopenllm_v2/djuna/MN-Chinofun-12B-4/4f09e60c-e68a-426c-ac7e-f5e6755e14be.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/djuna_MN-Chinofun-12B-4/1762652580.13009", + "retrieved_timestamp": "1762652580.130091", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "djuna/MN-Chinofun-12B-4", + "developer": "djuna", + "inference_platform": "unknown", + "id": "djuna/MN-Chinofun-12B-4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5404305021786637 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5347693369790583 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11178247734138973 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4306770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3497340425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/MN-Chinofun/023756a1-66cc-423a-803b-0d8b0f368bd2.json b/data/hfopenllm_v2/djuna/MN-Chinofun/023756a1-66cc-423a-803b-0d8b0f368bd2.json new file mode 100644 index 000000000..c2b824b9b --- /dev/null +++ b/data/hfopenllm_v2/djuna/MN-Chinofun/023756a1-66cc-423a-803b-0d8b0f368bd2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/djuna_MN-Chinofun/1762652580.1291971", + "retrieved_timestamp": "1762652580.1291971", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "djuna/MN-Chinofun", + "developer": "djuna", + "inference_platform": "unknown", + "id": "djuna/MN-Chinofun" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6110220880596817 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49527033812671534 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13066465256797583 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40835416666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36028922872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/Q2.5-Partron-7B/b045b20a-cdbf-4495-89ae-b235ada2e9e0.json b/data/hfopenllm_v2/djuna/Q2.5-Partron-7B/b045b20a-cdbf-4495-89ae-b235ada2e9e0.json new file mode 100644 index 000000000..bf2114f9a --- /dev/null +++ b/data/hfopenllm_v2/djuna/Q2.5-Partron-7B/b045b20a-cdbf-4495-89ae-b235ada2e9e0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/djuna_Q2.5-Partron-7B/1762652580.130363", + "retrieved_timestamp": "1762652580.130364", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "djuna/Q2.5-Partron-7B", + "developer": "djuna", + "inference_platform": "unknown", + "id": "djuna/Q2.5-Partron-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7321218810533828 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5418474850726388 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4826283987915408 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41654166666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4282746010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/Q2.5-Veltha-14B-0.5/258520cb-360a-4629-be8e-e4ffca8a81b2.json b/data/hfopenllm_v2/djuna/Q2.5-Veltha-14B-0.5/258520cb-360a-4629-be8e-e4ffca8a81b2.json new file mode 100644 index 000000000..f99e4a9ce --- /dev/null +++ b/data/hfopenllm_v2/djuna/Q2.5-Veltha-14B-0.5/258520cb-360a-4629-be8e-e4ffca8a81b2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/djuna_Q2.5-Veltha-14B-0.5/1762652580.13079", + "retrieved_timestamp": "1762652580.130791", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "djuna/Q2.5-Veltha-14B-0.5", + "developer": "djuna", + "inference_platform": "unknown", + "id": "djuna/Q2.5-Veltha-14B-0.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7795826185631901 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6523026688308357 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43731117824773413 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36828859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43390625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5295046542553191 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/djuna/Q2.5-Veltha-14B/0a9560cd-d3e2-4d41-b83c-f321bcfc9c3c.json b/data/hfopenllm_v2/djuna/Q2.5-Veltha-14B/0a9560cd-d3e2-4d41-b83c-f321bcfc9c3c.json new file mode 100644 index 000000000..1b7da584c --- /dev/null +++ b/data/hfopenllm_v2/djuna/Q2.5-Veltha-14B/0a9560cd-d3e2-4d41-b83c-f321bcfc9c3c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/djuna_Q2.5-Veltha-14B/1762652580.130576", + "retrieved_timestamp": "1762652580.1305768", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "djuna/Q2.5-Veltha-14B", + "developer": "djuna", + "inference_platform": "unknown", + "id": "djuna/Q2.5-Veltha-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8291666112581284 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.648421390292023 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4788519637462236 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35906040268456374 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41942708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5298371010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dnhkng/RYS-Llama-3-8B-Instruct/85472ae2-d5f0-4896-811b-d4217241bcef.json b/data/hfopenllm_v2/dnhkng/RYS-Llama-3-8B-Instruct/85472ae2-d5f0-4896-811b-d4217241bcef.json new file mode 100644 index 000000000..799da164e --- /dev/null +++ b/data/hfopenllm_v2/dnhkng/RYS-Llama-3-8B-Instruct/85472ae2-d5f0-4896-811b-d4217241bcef.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama-3-8B-Instruct/1762652580.131744", + "retrieved_timestamp": "1762652580.131744", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dnhkng/RYS-Llama-3-8B-Instruct", + "developer": "dnhkng", + "inference_platform": "unknown", + "id": "dnhkng/RYS-Llama-3-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6957772044841022 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4808708123069005 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06873111782477341 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33834375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.355718085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dnhkng/RYS-Llama-3-Huge-Instruct/0e8dfce1-b0d3-4ba5-a3be-ba6f52421841.json b/data/hfopenllm_v2/dnhkng/RYS-Llama-3-Huge-Instruct/0e8dfce1-b0d3-4ba5-a3be-ba6f52421841.json new file mode 100644 index 000000000..431b46ae7 --- /dev/null +++ b/data/hfopenllm_v2/dnhkng/RYS-Llama-3-Huge-Instruct/0e8dfce1-b0d3-4ba5-a3be-ba6f52421841.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama-3-Huge-Instruct/1762652580.1319628", + "retrieved_timestamp": "1762652580.131964", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dnhkng/RYS-Llama-3-Huge-Instruct", + "developer": "dnhkng", + "inference_platform": "unknown", + "id": "dnhkng/RYS-Llama-3-Huge-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7685917809190725 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6480872171360044 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22885196374622357 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4207604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.510970744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 99.646 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dnhkng/RYS-Llama-3-Large-Instruct/f9485436-6935-422f-9eb1-ee7faeb231d1.json b/data/hfopenllm_v2/dnhkng/RYS-Llama-3-Large-Instruct/f9485436-6935-422f-9eb1-ee7faeb231d1.json new file mode 100644 index 000000000..086883e91 --- /dev/null +++ b/data/hfopenllm_v2/dnhkng/RYS-Llama-3-Large-Instruct/f9485436-6935-422f-9eb1-ee7faeb231d1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama-3-Large-Instruct/1762652580.132239", + "retrieved_timestamp": "1762652580.132241", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dnhkng/RYS-Llama-3-Large-Instruct", + "developer": "dnhkng", + "inference_platform": "unknown", + "id": "dnhkng/RYS-Llama-3-Large-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8050616807847621 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.65252690724939 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23036253776435045 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41803125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5137134308510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 73.976 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dnhkng/RYS-Llama-3.1-8B-Instruct/62dab9bd-df83-4a0b-be94-0ddd981da6e4.json b/data/hfopenllm_v2/dnhkng/RYS-Llama-3.1-8B-Instruct/62dab9bd-df83-4a0b-be94-0ddd981da6e4.json new file mode 100644 index 000000000..8031673a6 --- /dev/null +++ b/data/hfopenllm_v2/dnhkng/RYS-Llama-3.1-8B-Instruct/62dab9bd-df83-4a0b-be94-0ddd981da6e4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama-3.1-8B-Instruct/1762652580.132753", + "retrieved_timestamp": "1762652580.1327538", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dnhkng/RYS-Llama-3.1-8B-Instruct", + "developer": "dnhkng", + "inference_platform": "unknown", + "id": "dnhkng/RYS-Llama-3.1-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7684920455502511 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5163645317446665 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13293051359516617 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3681041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36394614361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 8.685 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dnhkng/RYS-Medium/ca1e127b-ded1-4015-85b9-be134c26644d.json b/data/hfopenllm_v2/dnhkng/RYS-Medium/ca1e127b-ded1-4015-85b9-be134c26644d.json new file mode 100644 index 000000000..c65070ff8 --- /dev/null +++ b/data/hfopenllm_v2/dnhkng/RYS-Medium/ca1e127b-ded1-4015-85b9-be134c26644d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Medium/1762652580.131469", + "retrieved_timestamp": "1762652580.13147", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dnhkng/RYS-Medium", + "developer": "dnhkng", + "inference_platform": "unknown", + "id": "dnhkng/RYS-Medium" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4406131287206833 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6284726872432828 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10800604229607251 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40692708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4325964095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 18.731 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dnhkng/RYS-Phi-3-medium-4k-instruct/94f92919-36fb-4aed-8c0c-2bee0cd1d301.json b/data/hfopenllm_v2/dnhkng/RYS-Phi-3-medium-4k-instruct/94f92919-36fb-4aed-8c0c-2bee0cd1d301.json new file mode 100644 index 000000000..3f8b02d4c --- /dev/null +++ b/data/hfopenllm_v2/dnhkng/RYS-Phi-3-medium-4k-instruct/94f92919-36fb-4aed-8c0c-2bee0cd1d301.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Phi-3-medium-4k-instruct/1762652580.133586", + "retrieved_timestamp": "1762652580.133587", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dnhkng/RYS-Phi-3-medium-4k-instruct", + "developer": "dnhkng", + "inference_platform": "unknown", + "id": "dnhkng/RYS-Phi-3-medium-4k-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4391392616036561 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6226313539198264 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1608761329305136 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3548657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42528125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.484624335106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 17.709 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dnhkng/RYS-XLarge-base/1b0bb4ca-9553-4ddd-bf35-cab66685668d.json b/data/hfopenllm_v2/dnhkng/RYS-XLarge-base/1b0bb4ca-9553-4ddd-bf35-cab66685668d.json new file mode 100644 index 000000000..133d4499a --- /dev/null +++ b/data/hfopenllm_v2/dnhkng/RYS-XLarge-base/1b0bb4ca-9553-4ddd-bf35-cab66685668d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dnhkng_RYS-XLarge-base/1762652580.134071", + "retrieved_timestamp": "1762652580.134072", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dnhkng/RYS-XLarge-base", + "developer": "dnhkng", + "inference_platform": "unknown", + "id": "dnhkng/RYS-XLarge-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7910233735377686 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7047291858548728 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37915407854984895 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37919463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4902708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5430518617021277 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 77.972 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dnhkng/RYS-XLarge/a2a90b7e-f6db-408a-b5df-284d0b4a6353.json b/data/hfopenllm_v2/dnhkng/RYS-XLarge/a2a90b7e-f6db-408a-b5df-284d0b4a6353.json new file mode 100644 index 000000000..a390a50e0 --- /dev/null +++ b/data/hfopenllm_v2/dnhkng/RYS-XLarge/a2a90b7e-f6db-408a-b5df-284d0b4a6353.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dnhkng_RYS-XLarge/1762652580.1338398", + "retrieved_timestamp": "1762652580.1338408", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dnhkng/RYS-XLarge", + "developer": "dnhkng", + "inference_platform": "unknown", + "id": "dnhkng/RYS-XLarge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7995662619627034 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7050033079850099 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.425226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38422818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49696875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5428025265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 77.965 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dnhkng/RYS-XLarge2/6f344c50-fdf3-477e-9a76-558ed61fd509.json b/data/hfopenllm_v2/dnhkng/RYS-XLarge2/6f344c50-fdf3-477e-9a76-558ed61fd509.json new file mode 100644 index 000000000..9b7340a3a --- /dev/null +++ b/data/hfopenllm_v2/dnhkng/RYS-XLarge2/6f344c50-fdf3-477e-9a76-558ed61fd509.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dnhkng_RYS-XLarge2/1762652580.1343", + "retrieved_timestamp": "1762652580.134301", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dnhkng/RYS-XLarge2", + "developer": "dnhkng", + "inference_platform": "unknown", + "id": "dnhkng/RYS-XLarge2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49019712141562166 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6573947106260754 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27492447129909364 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37416107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4508020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5378158244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 77.965 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dreamgen/WizardLM-2-7B/5ed2650d-d76f-49d6-915b-ac551129913e.json b/data/hfopenllm_v2/dreamgen/WizardLM-2-7B/5ed2650d-d76f-49d6-915b-ac551129913e.json new file mode 100644 index 000000000..bc1329462 --- /dev/null +++ b/data/hfopenllm_v2/dreamgen/WizardLM-2-7B/5ed2650d-d76f-49d6-915b-ac551129913e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dreamgen_WizardLM-2-7B/1762652580.1345458", + "retrieved_timestamp": "1762652580.134547", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dreamgen/WizardLM-2-7B", + "developer": "dreamgen", + "inference_platform": "unknown", + "id": "dreamgen/WizardLM-2-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45829842595424586 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34867856163972016 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03323262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39409374999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2660405585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v1/c402fb6f-6e91-4e33-b847-87371373a6eb.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v1/c402fb6f-6e91-4e33-b847-87371373a6eb.json new file mode 100644 index 000000000..1d0c653f5 --- /dev/null +++ b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v1/c402fb6f-6e91-4e33-b847-87371373a6eb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v1/1762652580.134872", + "retrieved_timestamp": "1762652580.134874", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dustinwloring1988/Reflexis-8b-chat-v1", + "developer": "dustinwloring1988", + "inference_platform": "unknown", + "id": "dustinwloring1988/Reflexis-8b-chat-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3657750324694034 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4663596290293861 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11555891238670694 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3753958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3384308510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v2/6475a1f1-0c12-4ab3-89fc-cc5aa1d8145e.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v2/6475a1f1-0c12-4ab3-89fc-cc5aa1d8145e.json new file mode 100644 index 000000000..3e3a84976 --- /dev/null +++ b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v2/6475a1f1-0c12-4ab3-89fc-cc5aa1d8145e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v2/1762652580.135156", + "retrieved_timestamp": "1762652580.135157", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dustinwloring1988/Reflexis-8b-chat-v2", + "developer": "dustinwloring1988", + "inference_platform": "unknown", + "id": "dustinwloring1988/Reflexis-8b-chat-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3912042270065648 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47238018945807153 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1163141993957704 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3526354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3377659574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v3/5767ea0d-318c-4c65-9c96-890d27973302.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v3/5767ea0d-318c-4c65-9c96-890d27973302.json new file mode 100644 index 000000000..2cd71cf08 --- /dev/null +++ b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v3/5767ea0d-318c-4c65-9c96-890d27973302.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v3/1762652580.1353788", + "retrieved_timestamp": "1762652580.1353788", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dustinwloring1988/Reflexis-8b-chat-v3", + "developer": "dustinwloring1988", + "inference_platform": "unknown", + "id": "dustinwloring1988/Reflexis-8b-chat-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.536733644507684 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4658310598309874 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12235649546827794 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2424496644295302 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35117708333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35480385638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v4/ad9e0902-3542-4994-ae42-4f3ef9f88ab1.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v4/ad9e0902-3542-4994-ae42-4f3ef9f88ab1.json new file mode 100644 index 000000000..c07ced2a0 --- /dev/null +++ b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v4/ad9e0902-3542-4994-ae42-4f3ef9f88ab1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v4/1762652580.135605", + "retrieved_timestamp": "1762652580.135605", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dustinwloring1988/Reflexis-8b-chat-v4", + "developer": "dustinwloring1988", + "inference_platform": "unknown", + "id": "dustinwloring1988/Reflexis-8b-chat-v4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4697890486132351 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46860140660011185 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1027190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23406040268456377 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33930208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3390126329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v5/01c33f76-994a-4a1c-951d-88b34e471498.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v5/01c33f76-994a-4a1c-951d-88b34e471498.json new file mode 100644 index 000000000..bbda0bc1b --- /dev/null +++ b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v5/01c33f76-994a-4a1c-951d-88b34e471498.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v5/1762652580.135817", + "retrieved_timestamp": "1762652580.135818", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dustinwloring1988/Reflexis-8b-chat-v5", + "developer": "dustinwloring1988", + "inference_platform": "unknown", + "id": "dustinwloring1988/Reflexis-8b-chat-v5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42375231053604434 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4781685533183147 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1216012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33536458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3217253989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v6/65ce9e6f-cab9-4ccc-af89-de9be928529e.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v6/65ce9e6f-cab9-4ccc-af89-de9be928529e.json new file mode 100644 index 000000000..6d5af7c44 --- /dev/null +++ b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v6/65ce9e6f-cab9-4ccc-af89-de9be928529e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v6/1762652580.136029", + "retrieved_timestamp": "1762652580.13603", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dustinwloring1988/Reflexis-8b-chat-v6", + "developer": "dustinwloring1988", + "inference_platform": "unknown", + "id": "dustinwloring1988/Reflexis-8b-chat-v6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4938939790866014 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4809537068664902 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3753333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.347905585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v7/abadd81a-bd45-4eba-ae77-25190c751085.json b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v7/abadd81a-bd45-4eba-ae77-25190c751085.json new file mode 100644 index 000000000..d5b8689e3 --- /dev/null +++ b/data/hfopenllm_v2/dustinwloring1988/Reflexis-8b-chat-v7/abadd81a-bd45-4eba-ae77-25190c751085.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v7/1762652580.1362429", + "retrieved_timestamp": "1762652580.136244", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dustinwloring1988/Reflexis-8b-chat-v7", + "developer": "dustinwloring1988", + "inference_platform": "unknown", + "id": "dustinwloring1988/Reflexis-8b-chat-v7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39804828964924177 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4809830787114964 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16314199395770393 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32215625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3642785904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dwikitheduck/gemma-2-2b-id-instruct/73418e8c-ce10-4ea4-97f6-6f87c2be05a2.json b/data/hfopenllm_v2/dwikitheduck/gemma-2-2b-id-instruct/73418e8c-ce10-4ea4-97f6-6f87c2be05a2.json new file mode 100644 index 000000000..84a242d49 --- /dev/null +++ b/data/hfopenllm_v2/dwikitheduck/gemma-2-2b-id-instruct/73418e8c-ce10-4ea4-97f6-6f87c2be05a2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dwikitheduck_gemma-2-2b-id-instruct/1762652580.137409", + "retrieved_timestamp": "1762652580.1374102", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dwikitheduck/gemma-2-2b-id-instruct", + "developer": "dwikitheduck", + "inference_platform": "unknown", + "id": "dwikitheduck/gemma-2-2b-id-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38785644312646006 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39621721241423097 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41542708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21733710106382978 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dwikitheduck/gen-inst-1/5117b75d-3060-4434-a40d-01c471563685.json b/data/hfopenllm_v2/dwikitheduck/gen-inst-1/5117b75d-3060-4434-a40d-01c471563685.json new file mode 100644 index 000000000..571bbba1d --- /dev/null +++ b/data/hfopenllm_v2/dwikitheduck/gen-inst-1/5117b75d-3060-4434-a40d-01c471563685.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dwikitheduck_gen-inst-1/1762652580.1376698", + "retrieved_timestamp": "1762652580.137671", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dwikitheduck/gen-inst-1", + "developer": "dwikitheduck", + "inference_platform": "unknown", + "id": "dwikitheduck/gen-inst-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7750114141588762 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6419926671215591 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4554380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3716442953020134 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42054166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5088929521276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dwikitheduck/gen-try1-notemp/5bd29754-7f93-42fb-ba9b-7b3a4315bd17.json b/data/hfopenllm_v2/dwikitheduck/gen-try1-notemp/5bd29754-7f93-42fb-ba9b-7b3a4315bd17.json new file mode 100644 index 000000000..0e1da429d --- /dev/null +++ b/data/hfopenllm_v2/dwikitheduck/gen-try1-notemp/5bd29754-7f93-42fb-ba9b-7b3a4315bd17.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dwikitheduck_gen-try1-notemp/1762652580.13809", + "retrieved_timestamp": "1762652580.138091", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dwikitheduck/gen-try1-notemp", + "developer": "dwikitheduck", + "inference_platform": "unknown", + "id": "dwikitheduck/gen-try1-notemp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26270961050013963 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.626267088306491 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31797583081570996 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3540268456375839 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47141666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5210272606382979 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dwikitheduck/gen-try1/8f00112d-767f-4ac5-ae1c-e37781cf7eec.json b/data/hfopenllm_v2/dwikitheduck/gen-try1/8f00112d-767f-4ac5-ae1c-e37781cf7eec.json new file mode 100644 index 000000000..e7d8115ff --- /dev/null +++ b/data/hfopenllm_v2/dwikitheduck/gen-try1/8f00112d-767f-4ac5-ae1c-e37781cf7eec.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dwikitheduck_gen-try1/1762652580.137886", + "retrieved_timestamp": "1762652580.137887", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dwikitheduck/gen-try1", + "developer": "dwikitheduck", + "inference_platform": "unknown", + "id": "dwikitheduck/gen-try1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7522052598217175 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6358510933470735 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41012084592145015 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3414429530201342 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4415625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5110538563829787 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/dzakwan/dzakwan-MoE-4x7b-Beta/f4ceacae-0b81-44ac-8b9d-31d81e145bab.json b/data/hfopenllm_v2/dzakwan/dzakwan-MoE-4x7b-Beta/f4ceacae-0b81-44ac-8b9d-31d81e145bab.json new file mode 100644 index 000000000..e3f7597ba --- /dev/null +++ b/data/hfopenllm_v2/dzakwan/dzakwan-MoE-4x7b-Beta/f4ceacae-0b81-44ac-8b9d-31d81e145bab.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dzakwan_dzakwan-MoE-4x7b-Beta/1762652580.138297", + "retrieved_timestamp": "1762652580.138298", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dzakwan/dzakwan-MoE-4x7b-Beta", + "developer": "dzakwan", + "inference_platform": "unknown", + "id": "dzakwan/dzakwan-MoE-4x7b-Beta" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44426011870725235 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.514044131159397 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07779456193353475 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42673958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3107546542553192 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/Falcon3-8B-Franken-Basestruct/1653400c-137e-4745-8676-eeaf39bbcc13.json b/data/hfopenllm_v2/ehristoforu/Falcon3-8B-Franken-Basestruct/1653400c-137e-4745-8676-eeaf39bbcc13.json new file mode 100644 index 000000000..afd65e0fb --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/Falcon3-8B-Franken-Basestruct/1653400c-137e-4745-8676-eeaf39bbcc13.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_Falcon3-8B-Franken-Basestruct/1762652580.138562", + "retrieved_timestamp": "1762652580.1385632", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/Falcon3-8B-Franken-Basestruct", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/Falcon3-8B-Franken-Basestruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17148499315150467 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5462828074770284 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34060402684563756 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3554895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3946974734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.406 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/Falcon3-MoE-2x7B-Insruct/6b208d1e-96f1-4b72-8d31-6c6e43c42111.json b/data/hfopenllm_v2/ehristoforu/Falcon3-MoE-2x7B-Insruct/6b208d1e-96f1-4b72-8d31-6c6e43c42111.json new file mode 100644 index 000000000..6206b3a6c --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/Falcon3-MoE-2x7B-Insruct/6b208d1e-96f1-4b72-8d31-6c6e43c42111.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_Falcon3-MoE-2x7B-Insruct/1762652580.1388721", + "retrieved_timestamp": "1762652580.138873", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/Falcon3-MoE-2x7B-Insruct", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/Falcon3-MoE-2x7B-Insruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7642954028643998 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.564789641564995 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4123867069486405 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4840416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40949135638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 13.401 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/SoRu-0009/d45e7b32-f09d-4185-ac78-d0eb7a4d3823.json b/data/hfopenllm_v2/ehristoforu/SoRu-0009/d45e7b32-f09d-4185-ac78-d0eb7a4d3823.json new file mode 100644 index 000000000..67da08543 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/SoRu-0009/d45e7b32-f09d-4185-ac78-d0eb7a4d3823.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_SoRu-0009/1762652580.1407459", + "retrieved_timestamp": "1762652580.140747", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/SoRu-0009", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/SoRu-0009" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25818827378023645 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3149981683579724 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.021148036253776436 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3369479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12391954787234043 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/della-70b-test-v1/d9f6c1e9-84be-4666-b64f-5da37cf98202.json b/data/hfopenllm_v2/ehristoforu/della-70b-test-v1/d9f6c1e9-84be-4666-b64f-5da37cf98202.json new file mode 100644 index 000000000..bf0d712f7 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/della-70b-test-v1/d9f6c1e9-84be-4666-b64f-5da37cf98202.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_della-70b-test-v1/1762652580.141174", + "retrieved_timestamp": "1762652580.141175", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/della-70b-test-v1", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/della-70b-test-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49786566310722213 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3029452113782393 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.009818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45545833333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1574966755319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/falcon3-ultraset/e2291d7c-7627-484e-a0c1-1857c642be2b.json b/data/hfopenllm_v2/ehristoforu/falcon3-ultraset/e2291d7c-7627-484e-a0c1-1857c642be2b.json new file mode 100644 index 000000000..5844410db --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/falcon3-ultraset/e2291d7c-7627-484e-a0c1-1857c642be2b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_falcon3-ultraset/1762652580.1413918", + "retrieved_timestamp": "1762652580.141393", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/falcon3-ultraset", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/falcon3-ultraset" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7135123694020753 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5583684420918801 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2122356495468278 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33221476510067116 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48531250000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.398188164893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/fd-lora-merged-16x32/4d00474d-97e6-4384-82f7-956b2e7268e9.json b/data/hfopenllm_v2/ehristoforu/fd-lora-merged-16x32/4d00474d-97e6-4384-82f7-956b2e7268e9.json new file mode 100644 index 000000000..00b66f448 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/fd-lora-merged-16x32/4d00474d-97e6-4384-82f7-956b2e7268e9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_fd-lora-merged-16x32/1762652580.141611", + "retrieved_timestamp": "1762652580.141612", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/fd-lora-merged-16x32", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/fd-lora-merged-16x32" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3480897352358409 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3307564619842368 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17069486404833836 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35142708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12051196808510638 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.776 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/fd-lora-merged-64x128/6474672b-7728-4ab5-8fdf-749e996272a2.json b/data/hfopenllm_v2/ehristoforu/fd-lora-merged-64x128/6474672b-7728-4ab5-8fdf-749e996272a2.json new file mode 100644 index 000000000..28ae169b1 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/fd-lora-merged-64x128/6474672b-7728-4ab5-8fdf-749e996272a2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_fd-lora-merged-64x128/1762652580.14183", + "retrieved_timestamp": "1762652580.141831", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/fd-lora-merged-64x128", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/fd-lora-merged-64x128" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3281060918363276 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33447107385638297 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18731117824773413 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2550335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3368229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15367353723404256 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/fp4-14b-it-v1/31618256-7ca8-4a3c-bfbf-4397bf2cf339.json b/data/hfopenllm_v2/ehristoforu/fp4-14b-it-v1/31618256-7ca8-4a3c-bfbf-4397bf2cf339.json new file mode 100644 index 000000000..1027f1854 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/fp4-14b-it-v1/31618256-7ca8-4a3c-bfbf-4397bf2cf339.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_fp4-14b-it-v1/1762652580.1420429", + "retrieved_timestamp": "1762652580.1420438", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/fp4-14b-it-v1", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/fp4-14b-it-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25346746632269046 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5739715511094247 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35948958333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4204621010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/fp4-14b-v1-fix/37d01a2d-f8ca-46a3-a4b7-3fa725b4023b.json b/data/hfopenllm_v2/ehristoforu/fp4-14b-v1-fix/37d01a2d-f8ca-46a3-a4b7-3fa725b4023b.json new file mode 100644 index 000000000..48602e406 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/fp4-14b-v1-fix/37d01a2d-f8ca-46a3-a4b7-3fa725b4023b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_fp4-14b-v1-fix/1762652580.142252", + "retrieved_timestamp": "1762652580.1422532", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/fp4-14b-v1-fix", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/fp4-14b-v1-fix" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6741700909143296 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6817274121032688 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4206948640483384 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3540268456375839 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4531875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5353224734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_false/a5004f95-0854-40d2-8a71-004875544499.json b/data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_false/a5004f95-0854-40d2-8a71-004875544499.json new file mode 100644 index 000000000..cfd4a01d3 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_false/a5004f95-0854-40d2-8a71-004875544499.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_fq2.5-7b-it-normalize_false/1762652580.142459", + "retrieved_timestamp": "1762652580.1424599", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/fq2.5-7b-it-normalize_false", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/fq2.5-7b-it-normalize_false" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7399156460413925 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.551986272150289 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4622356495468278 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46115625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44132313829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_true/d0d8274c-7d05-4166-a510-487cb294135e.json b/data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_true/d0d8274c-7d05-4166-a510-487cb294135e.json new file mode 100644 index 000000000..b5e9bfbb8 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/fq2.5-7b-it-normalize_true/d0d8274c-7d05-4166-a510-487cb294135e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_fq2.5-7b-it-normalize_true/1762652580.1426702", + "retrieved_timestamp": "1762652580.142671", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/fq2.5-7b-it-normalize_true", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/fq2.5-7b-it-normalize_true" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7399156460413925 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.551986272150289 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4622356495468278 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46115625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44132313829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/mllama-3.1-8b-instruct/40016b83-0730-4e67-b7e9-3b1d29d9d1be.json b/data/hfopenllm_v2/ehristoforu/mllama-3.1-8b-instruct/40016b83-0730-4e67-b7e9-3b1d29d9d1be.json new file mode 100644 index 000000000..98d813390 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/mllama-3.1-8b-instruct/40016b83-0730-4e67-b7e9-3b1d29d9d1be.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_mllama-3.1-8b-instruct/1762652580.143588", + "retrieved_timestamp": "1762652580.143589", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/mllama-3.1-8b-instruct", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/mllama-3.1-8b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3457913890698901 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47176616480333583 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3776435045317221 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.338 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533244680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/moremerge-upscaled/5c465aeb-c6be-4a22-9cf0-3d9c2558ba39.json b/data/hfopenllm_v2/ehristoforu/moremerge-upscaled/5c465aeb-c6be-4a22-9cf0-3d9c2558ba39.json new file mode 100644 index 000000000..de2e36884 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/moremerge-upscaled/5c465aeb-c6be-4a22-9cf0-3d9c2558ba39.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_moremerge-upscaled/1762652580.144358", + "retrieved_timestamp": "1762652580.1443589", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/moremerge-upscaled", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/moremerge-upscaled" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1978882697908217 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26977370070980244 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24664429530201343 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35930208333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10413896276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 8.545 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/moremerge/38cf2a56-ed33-4f7e-94aa-bf4f15a5a53c.json b/data/hfopenllm_v2/ehristoforu/moremerge/38cf2a56-ed33-4f7e-94aa-bf4f15a5a53c.json new file mode 100644 index 000000000..e1a75e5d1 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/moremerge/38cf2a56-ed33-4f7e-94aa-bf4f15a5a53c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_moremerge/1762652580.1440692", + "retrieved_timestamp": "1762652580.14407", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/moremerge", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/moremerge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20190982149585324 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28684447696551024 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35657291666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10654920212765957 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/rmoe-v1/e58aecba-3254-426d-aac2-05a32c3cbdab.json b/data/hfopenllm_v2/ehristoforu/rmoe-v1/e58aecba-3254-426d-aac2-05a32c3cbdab.json new file mode 100644 index 000000000..aec0041ea --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/rmoe-v1/e58aecba-3254-426d-aac2-05a32c3cbdab.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_rmoe-v1/1762652580.1453388", + "retrieved_timestamp": "1762652580.14534", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/rmoe-v1", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/rmoe-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26500795666609045 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29292907133609175 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0015105740181268882 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36634374999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1124501329787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 11.026 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/rufalcon3-3b-it/8f4336f8-1fdb-4a3d-8b9a-2e7c5e156f07.json b/data/hfopenllm_v2/ehristoforu/rufalcon3-3b-it/8f4336f8-1fdb-4a3d-8b9a-2e7c5e156f07.json new file mode 100644 index 000000000..da62d6188 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/rufalcon3-3b-it/8f4336f8-1fdb-4a3d-8b9a-2e7c5e156f07.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_rufalcon3-3b-it/1762652580.14555", + "retrieved_timestamp": "1762652580.14555", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/rufalcon3-3b-it", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/rufalcon3-3b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5942111375594533 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41554222543957625 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1782477341389728 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38953124999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2347905585106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.228 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/testq-32b/d5acc9ed-9fd1-411f-a85c-e790521e7fe4.json b/data/hfopenllm_v2/ehristoforu/testq-32b/d5acc9ed-9fd1-411f-a85c-e790521e7fe4.json new file mode 100644 index 000000000..afa7e4db7 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/testq-32b/d5acc9ed-9fd1-411f-a85c-e790521e7fe4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_testq-32b/1762652580.145958", + "retrieved_timestamp": "1762652580.145958", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/testq-32b", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/testq-32b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18759668789921852 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2876549792486152 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0030211480362537764 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3714583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11660571808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 56.165 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/tmoe-v2/0a84406f-a970-4a03-8d2f-c82a8bbd3872.json b/data/hfopenllm_v2/ehristoforu/tmoe-v2/0a84406f-a970-4a03-8d2f-c82a8bbd3872.json new file mode 100644 index 000000000..3550073e2 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/tmoe-v2/0a84406f-a970-4a03-8d2f-c82a8bbd3872.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_tmoe-v2/1762652580.146366", + "retrieved_timestamp": "1762652580.146367", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/tmoe-v2", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/tmoe-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19026959578363187 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2896740649804915 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0022658610271903325 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4150833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11003989361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 11.026 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/tmoe/0a160c2d-06ed-43c0-8705-bd76e47c093a.json b/data/hfopenllm_v2/ehristoforu/tmoe/0a160c2d-06ed-43c0-8705-bd76e47c093a.json new file mode 100644 index 000000000..369454037 --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/tmoe/0a160c2d-06ed-43c0-8705-bd76e47c093a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_tmoe/1762652580.1461592", + "retrieved_timestamp": "1762652580.1461592", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/tmoe", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/tmoe" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11930234001338672 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30728601408520645 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0075528700906344415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2231543624161074 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36990624999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11909906914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 11.026 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/trd-7b-it/3bd7f3c1-772a-45fa-9d71-a6e3dff3b54f.json b/data/hfopenllm_v2/ehristoforu/trd-7b-it/3bd7f3c1-772a-45fa-9d71-a6e3dff3b54f.json new file mode 100644 index 000000000..7a5e5975b --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/trd-7b-it/3bd7f3c1-772a-45fa-9d71-a6e3dff3b54f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_trd-7b-it/1762652580.146566", + "retrieved_timestamp": "1762652580.1465669", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/trd-7b-it", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/trd-7b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21847143357402804 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2990238931062931 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03172205438066465 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3794270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11785239361702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ehristoforu/ud-14b/7e7ffbef-c8d4-47ff-9ae6-7f0701e9e192.json b/data/hfopenllm_v2/ehristoforu/ud-14b/7e7ffbef-c8d4-47ff-9ae6-7f0701e9e192.json new file mode 100644 index 000000000..73198648a --- /dev/null +++ b/data/hfopenllm_v2/ehristoforu/ud-14b/7e7ffbef-c8d4-47ff-9ae6-7f0701e9e192.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_ud-14b/1762652580.146786", + "retrieved_timestamp": "1762652580.146786", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/ud-14b", + "developer": "ehristoforu", + "inference_platform": "unknown", + "id": "ehristoforu/ud-14b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4235273518708139 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3323819044961654 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1903323262839879 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23741610738255034 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43942708333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24152260638297873 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/elinas/Chronos-Gold-12B-1.0/4705d82c-514c-48a1-8f87-4d2b8f9aff6b.json b/data/hfopenllm_v2/elinas/Chronos-Gold-12B-1.0/4705d82c-514c-48a1-8f87-4d2b8f9aff6b.json new file mode 100644 index 000000000..de8bcee6d --- /dev/null +++ b/data/hfopenllm_v2/elinas/Chronos-Gold-12B-1.0/4705d82c-514c-48a1-8f87-4d2b8f9aff6b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/elinas_Chronos-Gold-12B-1.0/1762652580.1470149", + "retrieved_timestamp": "1762652580.147016", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "elinas/Chronos-Gold-12B-1.0", + "developer": "elinas", + "inference_platform": "unknown", + "id": "elinas/Chronos-Gold-12B-1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3165656014929277 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5514664110708439 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06948640483383686 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47398958333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.351811835106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/euclaise/ReMask-3B/a905005d-85fa-44c9-848b-286f9100bab7.json b/data/hfopenllm_v2/euclaise/ReMask-3B/a905005d-85fa-44c9-848b-286f9100bab7.json new file mode 100644 index 000000000..360ef38fa --- /dev/null +++ b/data/hfopenllm_v2/euclaise/ReMask-3B/a905005d-85fa-44c9-848b-286f9100bab7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/euclaise_ReMask-3B/1762652580.14753", + "retrieved_timestamp": "1762652580.147531", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "euclaise/ReMask-3B", + "developer": "euclaise", + "inference_platform": "unknown", + "id": "euclaise/ReMask-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2419269759792905 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3516779692917367 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33409375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13572140957446807 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "StableLmForCausalLM", + "params_billions": 2.795 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/eworojoshua/vas-01/f02ca364-4bf8-4f00-aecc-492ac1f0817a.json b/data/hfopenllm_v2/eworojoshua/vas-01/f02ca364-4bf8-4f00-aecc-492ac1f0817a.json new file mode 100644 index 000000000..f3b619c60 --- /dev/null +++ b/data/hfopenllm_v2/eworojoshua/vas-01/f02ca364-4bf8-4f00-aecc-492ac1f0817a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/eworojoshua_vas-01/1762652580.1477718", + "retrieved_timestamp": "1762652580.147773", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "eworojoshua/vas-01", + "developer": "eworojoshua", + "inference_platform": "unknown", + "id": "eworojoshua/vas-01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7612479332615238 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5417819433732887 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4735649546827795 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44323958333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4347573138297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning/8bdc63c5-2ed3-4738-8a5c-6b90ba969f99.json b/data/hfopenllm_v2/ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning/8bdc63c5-2ed3-4738-8a5c-6b90ba969f99.json new file mode 100644 index 000000000..9b70b3138 --- /dev/null +++ b/data/hfopenllm_v2/ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning/8bdc63c5-2ed3-4738-8a5c-6b90ba969f99.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ewre324_Thinker-Llama-3.2-3B-Instruct-Reasoning/1762652580.148031", + "retrieved_timestamp": "1762652580.148032", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning", + "developer": "ewre324", + "inference_platform": "unknown", + "id": "ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44388555698878973 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4273125047156003 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36553125000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2886469414893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning/fe29c3e7-463b-45a1-8377-97e7c7f21874.json b/data/hfopenllm_v2/ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning/fe29c3e7-463b-45a1-8377-97e7c7f21874.json new file mode 100644 index 000000000..7b4e0244c --- /dev/null +++ b/data/hfopenllm_v2/ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning/fe29c3e7-463b-45a1-8377-97e7c7f21874.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ewre324_Thinker-Qwen2.5-0.5B-Instruct-Reasoning/1762652580.148299", + "retrieved_timestamp": "1762652580.1483", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning", + "developer": "ewre324", + "inference_platform": "unknown", + "id": "ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2476473534665798 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3292122979013761 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028700906344410877 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33821875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16472739361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning/5a03703c-6934-437c-aaca-2acfdd4ca629.json b/data/hfopenllm_v2/ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning/5a03703c-6934-437c-aaca-2acfdd4ca629.json new file mode 100644 index 000000000..3a94014a9 --- /dev/null +++ b/data/hfopenllm_v2/ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning/5a03703c-6934-437c-aaca-2acfdd4ca629.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ewre324_Thinker-SmolLM2-135M-Instruct-Reasoning/1762652580.148509", + "retrieved_timestamp": "1762652580.14851", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning", + "developer": "ewre324", + "inference_platform": "unknown", + "id": "ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25836336476105626 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3071349750892843 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.366125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.109375 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ewre324/ewre324-R1-SmolLM2-135M-Distill/6429c440-4d89-4d31-919c-63cde25ba99f.json b/data/hfopenllm_v2/ewre324/ewre324-R1-SmolLM2-135M-Distill/6429c440-4d89-4d31-919c-63cde25ba99f.json new file mode 100644 index 000000000..2f4e88c8b --- /dev/null +++ b/data/hfopenllm_v2/ewre324/ewre324-R1-SmolLM2-135M-Distill/6429c440-4d89-4d31-919c-63cde25ba99f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ewre324_ewre324-R1-SmolLM2-135M-Distill/1762652580.148724", + "retrieved_timestamp": "1762652580.148725", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ewre324/ewre324-R1-SmolLM2-135M-Distill", + "developer": "ewre324", + "inference_platform": "unknown", + "id": "ewre324/ewre324-R1-SmolLM2-135M-Distill" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16489026893088118 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3041695757290421 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3409166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11336436170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/experiment-llm/exp-3-q-r/7d72dcb1-bc5d-41bf-b333-c21e67b0acd2.json b/data/hfopenllm_v2/experiment-llm/exp-3-q-r/7d72dcb1-bc5d-41bf-b333-c21e67b0acd2.json new file mode 100644 index 000000000..bb9641128 --- /dev/null +++ b/data/hfopenllm_v2/experiment-llm/exp-3-q-r/7d72dcb1-bc5d-41bf-b333-c21e67b0acd2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/experiment-llm_exp-3-q-r/1762652580.148931", + "retrieved_timestamp": "1762652580.148932", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "experiment-llm/exp-3-q-r", + "developer": "experiment-llm", + "inference_platform": "unknown", + "id": "experiment-llm/exp-3-q-r" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6035785050333116 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5397159253811645 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27870090634441086 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43154166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43159906914893614 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/facebook/opt-1.3b/8675526d-af0b-4bf2-b143-123249371076.json b/data/hfopenllm_v2/facebook/opt-1.3b/8675526d-af0b-4bf2-b143-123249371076.json new file mode 100644 index 000000000..3de442eca --- /dev/null +++ b/data/hfopenllm_v2/facebook/opt-1.3b/8675526d-af0b-4bf2-b143-123249371076.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/facebook_opt-1.3b/1762652580.14919", + "retrieved_timestamp": "1762652580.14919", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "facebook/opt-1.3b", + "developer": "facebook", + "inference_platform": "unknown", + "id": "facebook/opt-1.3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23832985367713222 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3093947052760125 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2424496644295302 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11070478723404255 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "OPTForCausalLM", + "params_billions": 1.3 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/facebook/opt-30b/1883ddb6-e4cc-4935-81ba-af30af1537e9.json b/data/hfopenllm_v2/facebook/opt-30b/1883ddb6-e4cc-4935-81ba-af30af1537e9.json new file mode 100644 index 000000000..151319769 --- /dev/null +++ b/data/hfopenllm_v2/facebook/opt-30b/1883ddb6-e4cc-4935-81ba-af30af1537e9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/facebook_opt-30b/1762652580.14943", + "retrieved_timestamp": "1762652580.149431", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "facebook/opt-30b", + "developer": "facebook", + "inference_platform": "unknown", + "id": "facebook/opt-30b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2452991396162183 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30703447525623373 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36041666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1163563829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "OPTForCausalLM", + "params_billions": 30.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-MopeyMule/f5bfa461-15bf-4e32-8471-74f456c62fd9.json b/data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-MopeyMule/f5bfa461-15bf-4e32-8471-74f456c62fd9.json new file mode 100644 index 000000000..9cc143c15 --- /dev/null +++ b/data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-MopeyMule/f5bfa461-15bf-4e32-8471-74f456c62fd9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/failspy_Llama-3-8B-Instruct-MopeyMule/1762652580.1496441", + "retrieved_timestamp": "1762652580.1496441", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "failspy/Llama-3-8B-Instruct-MopeyMule", + "developer": "failspy", + "inference_platform": "unknown", + "id": "failspy/Llama-3-8B-Instruct-MopeyMule" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6750444376476638 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.383874490132152 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23909395973154363 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35130208333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17644614361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-abliterated/8aa6c90e-a6ee-4dfe-8bf4-b5d256be9cd6.json b/data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-abliterated/8aa6c90e-a6ee-4dfe-8bf4-b5d256be9cd6.json new file mode 100644 index 000000000..43304db70 --- /dev/null +++ b/data/hfopenllm_v2/failspy/Llama-3-8B-Instruct-abliterated/8aa6c90e-a6ee-4dfe-8bf4-b5d256be9cd6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/failspy_Llama-3-8B-Instruct-abliterated/1762652580.1499012", + "retrieved_timestamp": "1762652580.149902", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "failspy/Llama-3-8B-Instruct-abliterated", + "developer": "failspy", + "inference_platform": "unknown", + "id": "failspy/Llama-3-8B-Instruct-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5908888416069362 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4353752684977051 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41158333333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2741855053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5/e0329607-d832-4252-ad71-81e8a8c4bb31.json b/data/hfopenllm_v2/failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5/e0329607-d832-4252-ad71-81e8a8c4bb31.json new file mode 100644 index 000000000..31f071b31 --- /dev/null +++ b/data/hfopenllm_v2/failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5/e0329607-d832-4252-ad71-81e8a8c4bb31.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/failspy_Meta-Llama-3-70B-Instruct-abliterated-v3.5/1762652580.1501682", + "retrieved_timestamp": "1762652580.1501691", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5", + "developer": "failspy", + "inference_platform": "unknown", + "id": "failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7746867201248244 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.574710022890038 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39818749999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44522938829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/failspy/Meta-Llama-3-8B-Instruct-abliterated-v3/c598dbff-4ab5-4405-b75d-13571ae3d862.json b/data/hfopenllm_v2/failspy/Meta-Llama-3-8B-Instruct-abliterated-v3/c598dbff-4ab5-4405-b75d-13571ae3d862.json new file mode 100644 index 000000000..20fce2620 --- /dev/null +++ b/data/hfopenllm_v2/failspy/Meta-Llama-3-8B-Instruct-abliterated-v3/c598dbff-4ab5-4405-b75d-13571ae3d862.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/failspy_Meta-Llama-3-8B-Instruct-abliterated-v3/1762652580.150389", + "retrieved_timestamp": "1762652580.15039", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3", + "developer": "failspy", + "inference_platform": "unknown", + "id": "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7244533393617822 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4924562150856957 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09592145015105741 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36218749999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3653590425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/failspy/Phi-3-medium-4k-instruct-abliterated-v3/264bc4a6-f0ad-4eef-a519-6d97f8f6ab91.json b/data/hfopenllm_v2/failspy/Phi-3-medium-4k-instruct-abliterated-v3/264bc4a6-f0ad-4eef-a519-6d97f8f6ab91.json new file mode 100644 index 000000000..f0b205157 --- /dev/null +++ b/data/hfopenllm_v2/failspy/Phi-3-medium-4k-instruct-abliterated-v3/264bc4a6-f0ad-4eef-a519-6d97f8f6ab91.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/failspy_Phi-3-medium-4k-instruct-abliterated-v3/1762652580.1505978", + "retrieved_timestamp": "1762652580.150599", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "failspy/Phi-3-medium-4k-instruct-abliterated-v3", + "developer": "failspy", + "inference_platform": "unknown", + "id": "failspy/Phi-3-medium-4k-instruct-abliterated-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6319299458769398 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6304799176474429 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1593655589123867 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4604166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4399933510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/failspy/llama-3-70B-Instruct-abliterated/f31f7ad3-9018-4891-be05-12787728904c.json b/data/hfopenllm_v2/failspy/llama-3-70B-Instruct-abliterated/f31f7ad3-9018-4891-be05-12787728904c.json new file mode 100644 index 000000000..7d7a8c725 --- /dev/null +++ b/data/hfopenllm_v2/failspy/llama-3-70B-Instruct-abliterated/f31f7ad3-9018-4891-be05-12787728904c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/failspy_llama-3-70B-Instruct-abliterated/1762652580.1508029", + "retrieved_timestamp": "1762652580.150804", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "failspy/llama-3-70B-Instruct-abliterated", + "developer": "failspy", + "inference_platform": "unknown", + "id": "failspy/llama-3-70B-Instruct-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8023389052159382 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6464853840398571 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.243202416918429 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4127604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5145445478723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/63bdc7e2-6518-4da4-81f4-74aab25f7a5e.json b/data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/63bdc7e2-6518-4da4-81f4-74aab25f7a5e.json new file mode 100644 index 000000000..e23f480a5 --- /dev/null +++ b/data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/63bdc7e2-6518-4da4-81f4-74aab25f7a5e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/fblgit_TheBeagle-v2beta-32B-MGS/1762652580.1510022", + "retrieved_timestamp": "1762652580.151003", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fblgit/TheBeagle-v2beta-32B-MGS", + "developer": "fblgit", + "inference_platform": "unknown", + "id": "fblgit/TheBeagle-v2beta-32B-MGS" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.518074265171966 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7032634749563558 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4947129909365559 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3825503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.50075 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5915059840425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/8338dd8a-88c2-42f8-9d67-13b852e3c0ea.json b/data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/8338dd8a-88c2-42f8-9d67-13b852e3c0ea.json new file mode 100644 index 000000000..bed0fee09 --- /dev/null +++ b/data/hfopenllm_v2/fblgit/TheBeagle-v2beta-32B-MGS/8338dd8a-88c2-42f8-9d67-13b852e3c0ea.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/fblgit_TheBeagle-v2beta-32B-MGS/1762652580.151249", + "retrieved_timestamp": "1762652580.151249", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fblgit/TheBeagle-v2beta-32B-MGS", + "developer": "fblgit", + "inference_platform": "unknown", + "id": "fblgit/TheBeagle-v2beta-32B-MGS" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4503051902285935 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.703542441088263 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3942598187311178 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.401006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5021145833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5910904255319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/UNA-SimpleSmaug-34b-v1beta/f98b051e-0984-423d-89c0-352368168d75.json b/data/hfopenllm_v2/fblgit/UNA-SimpleSmaug-34b-v1beta/f98b051e-0984-423d-89c0-352368168d75.json new file mode 100644 index 000000000..2d03947a2 --- /dev/null +++ b/data/hfopenllm_v2/fblgit/UNA-SimpleSmaug-34b-v1beta/f98b051e-0984-423d-89c0-352368168d75.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/fblgit_UNA-SimpleSmaug-34b-v1beta/1762652580.151433", + "retrieved_timestamp": "1762652580.151433", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fblgit/UNA-SimpleSmaug-34b-v1beta", + "developer": "fblgit", + "inference_platform": "unknown", + "id": "fblgit/UNA-SimpleSmaug-34b-v1beta" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45562551806983254 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5286654104993475 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07175226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4255625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4539561170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/UNA-TheBeagle-7b-v1/454be483-8a45-4bea-a370-5f5a74a924ea.json b/data/hfopenllm_v2/fblgit/UNA-TheBeagle-7b-v1/454be483-8a45-4bea-a370-5f5a74a924ea.json new file mode 100644 index 000000000..dc7f95417 --- /dev/null +++ b/data/hfopenllm_v2/fblgit/UNA-TheBeagle-7b-v1/454be483-8a45-4bea-a370-5f5a74a924ea.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/fblgit_UNA-TheBeagle-7b-v1/1762652580.151644", + "retrieved_timestamp": "1762652580.151645", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fblgit/UNA-TheBeagle-7b-v1", + "developer": "fblgit", + "inference_platform": "unknown", + "id": "fblgit/UNA-TheBeagle-7b-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36887236975669 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5028691097522866 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0770392749244713 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4564375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3019448138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/UNA-ThePitbull-21.4B-v2/afdf8e40-d87a-4a9c-93a7-a65fe2ae732a.json b/data/hfopenllm_v2/fblgit/UNA-ThePitbull-21.4B-v2/afdf8e40-d87a-4a9c-93a7-a65fe2ae732a.json new file mode 100644 index 000000000..d9e243c3e --- /dev/null +++ b/data/hfopenllm_v2/fblgit/UNA-ThePitbull-21.4B-v2/afdf8e40-d87a-4a9c-93a7-a65fe2ae732a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/fblgit_UNA-ThePitbull-21.4B-v2/1762652580.151847", + "retrieved_timestamp": "1762652580.151847", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fblgit/UNA-ThePitbull-21.4B-v2", + "developer": "fblgit", + "inference_platform": "unknown", + "id": "fblgit/UNA-ThePitbull-21.4B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3790387283518841 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.635038821016254 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1216012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3921666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3515625 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 21.421 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-MGS/60ac5509-346d-4717-a729-0413fce4b203.json b/data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-MGS/60ac5509-346d-4717-a729-0413fce4b203.json new file mode 100644 index 000000000..bdc018372 --- /dev/null +++ b/data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-MGS/60ac5509-346d-4717-a729-0413fce4b203.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/fblgit_cybertron-v4-qw7B-MGS/1762652580.15205", + "retrieved_timestamp": "1762652580.152051", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fblgit/cybertron-v4-qw7B-MGS", + "developer": "fblgit", + "inference_platform": "unknown", + "id": "fblgit/cybertron-v4-qw7B-MGS" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6263846593704703 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5591772533435835 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34894259818731116 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43709375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44730718085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-UNAMGS/8c73c2a6-b2e9-419d-8c00-8a983790ba9b.json b/data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-UNAMGS/8c73c2a6-b2e9-419d-8c00-8a983790ba9b.json new file mode 100644 index 000000000..9f135528e --- /dev/null +++ b/data/hfopenllm_v2/fblgit/cybertron-v4-qw7B-UNAMGS/8c73c2a6-b2e9-419d-8c00-8a983790ba9b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/fblgit_cybertron-v4-qw7B-UNAMGS/1762652580.1522481", + "retrieved_timestamp": "1762652580.152249", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fblgit/cybertron-v4-qw7B-UNAMGS", + "developer": "fblgit", + "inference_platform": "unknown", + "id": "fblgit/cybertron-v4-qw7B-UNAMGS" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6090240561709597 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5642509108139038 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3731117824773414 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4343333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4500498670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/juanako-7b-UNA/f61e534a-06b4-4558-8ee6-227ad1e97699.json b/data/hfopenllm_v2/fblgit/juanako-7b-UNA/f61e534a-06b4-4558-8ee6-227ad1e97699.json new file mode 100644 index 000000000..306940a73 --- /dev/null +++ b/data/hfopenllm_v2/fblgit/juanako-7b-UNA/f61e534a-06b4-4558-8ee6-227ad1e97699.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/fblgit_juanako-7b-UNA/1762652580.1524491", + "retrieved_timestamp": "1762652580.15245", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fblgit/juanako-7b-UNA", + "developer": "fblgit", + "inference_platform": "unknown", + "id": "fblgit/juanako-7b-UNA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4837276204914073 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.507001145736535 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.033987915407854986 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46449999999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.277094414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS-GRPO/a1d14150-3b2e-489f-8d18-8894862e9ab0.json b/data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS-GRPO/a1d14150-3b2e-489f-8d18-8894862e9ab0.json new file mode 100644 index 000000000..039893179 --- /dev/null +++ b/data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS-GRPO/a1d14150-3b2e-489f-8d18-8894862e9ab0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO/1762652580.153163", + "retrieved_timestamp": "1762652580.1531641", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fblgit/miniclaus-qw1.5B-UNAMGS-GRPO", + "developer": "fblgit", + "inference_platform": "unknown", + "id": "fblgit/miniclaus-qw1.5B-UNAMGS-GRPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3518364605912313 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.423443453814005 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11027190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42543749999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2945478723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS/4b337805-4bd3-4106-bcde-adb7a6fbec23.json b/data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS/4b337805-4bd3-4106-bcde-adb7a6fbec23.json new file mode 100644 index 000000000..5a98081f1 --- /dev/null +++ b/data/hfopenllm_v2/fblgit/miniclaus-qw1.5B-UNAMGS/4b337805-4bd3-4106-bcde-adb7a6fbec23.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/fblgit_miniclaus-qw1.5B-UNAMGS/1762652580.152649", + "retrieved_timestamp": "1762652580.152649", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fblgit/miniclaus-qw1.5B-UNAMGS", + "developer": "fblgit", + "inference_platform": "unknown", + "id": "fblgit/miniclaus-qw1.5B-UNAMGS" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3348005514257725 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4238588294007628 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10876132930513595 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42934374999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2937167553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/pancho-v1-qw25-3B-UNAMGS/701cb3af-8916-47ab-b118-1cd778a23e66.json b/data/hfopenllm_v2/fblgit/pancho-v1-qw25-3B-UNAMGS/701cb3af-8916-47ab-b118-1cd778a23e66.json new file mode 100644 index 000000000..6bb5c79b9 --- /dev/null +++ b/data/hfopenllm_v2/fblgit/pancho-v1-qw25-3B-UNAMGS/701cb3af-8916-47ab-b118-1cd778a23e66.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/fblgit_pancho-v1-qw25-3B-UNAMGS/1762652580.153452", + "retrieved_timestamp": "1762652580.153453", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fblgit/pancho-v1-qw25-3B-UNAMGS", + "developer": "fblgit", + "inference_platform": "unknown", + "id": "fblgit/pancho-v1-qw25-3B-UNAMGS" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.536134124123991 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49258278193390775 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15709969788519637 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4027395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3765791223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.397 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fblgit/una-cybertron-7b-v2-bf16/8fc3e145-958b-4f25-bfab-4364bcdfeeb1.json b/data/hfopenllm_v2/fblgit/una-cybertron-7b-v2-bf16/8fc3e145-958b-4f25-bfab-4364bcdfeeb1.json new file mode 100644 index 000000000..bbb435f64 --- /dev/null +++ b/data/hfopenllm_v2/fblgit/una-cybertron-7b-v2-bf16/8fc3e145-958b-4f25-bfab-4364bcdfeeb1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/fblgit_una-cybertron-7b-v2-bf16/1762652580.153698", + "retrieved_timestamp": "1762652580.1536992", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fblgit/una-cybertron-7b-v2-bf16", + "developer": "fblgit", + "inference_platform": "unknown", + "id": "fblgit/una-cybertron-7b-v2-bf16" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47371086494944525 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3973388920486269 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4473229166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2442652925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fhai50032/RolePlayLake-7B/af85e87f-1308-4968-850a-27382f36a63a.json b/data/hfopenllm_v2/fhai50032/RolePlayLake-7B/af85e87f-1308-4968-850a-27382f36a63a.json new file mode 100644 index 000000000..81a70a264 --- /dev/null +++ b/data/hfopenllm_v2/fhai50032/RolePlayLake-7B/af85e87f-1308-4968-850a-27382f36a63a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/fhai50032_RolePlayLake-7B/1762652580.153994", + "retrieved_timestamp": "1762652580.153995", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fhai50032/RolePlayLake-7B", + "developer": "fhai50032", + "inference_platform": "unknown", + "id": "fhai50032/RolePlayLake-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5056594280952318 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5252170095233862 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07250755287009064 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4459270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3159906914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/flammenai/flammen15-gutenberg-DPO-v1-7B/1244b8d9-e832-4f2b-8ae5-52449f6ac38c.json b/data/hfopenllm_v2/flammenai/flammen15-gutenberg-DPO-v1-7B/1244b8d9-e832-4f2b-8ae5-52449f6ac38c.json new file mode 100644 index 000000000..a2aabfdf8 --- /dev/null +++ b/data/hfopenllm_v2/flammenai/flammen15-gutenberg-DPO-v1-7B/1244b8d9-e832-4f2b-8ae5-52449f6ac38c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/flammenai_flammen15-gutenberg-DPO-v1-7B/1762652580.155953", + "retrieved_timestamp": "1762652580.155954", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "flammenai/flammen15-gutenberg-DPO-v1-7B", + "developer": "flammenai", + "inference_platform": "unknown", + "id": "flammenai/flammen15-gutenberg-DPO-v1-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47980580415519714 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5202983979716951 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07628398791540786 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4293125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3185671542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fluently-lm/FluentlyLM-Prinum/950d2518-7245-4ed4-9b16-91f944aa8f15.json b/data/hfopenllm_v2/fluently-lm/FluentlyLM-Prinum/950d2518-7245-4ed4-9b16-91f944aa8f15.json new file mode 100644 index 000000000..f2e6cac4f --- /dev/null +++ b/data/hfopenllm_v2/fluently-lm/FluentlyLM-Prinum/950d2518-7245-4ed4-9b16-91f944aa8f15.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/fluently-lm_FluentlyLM-Prinum/1762652580.156252", + "retrieved_timestamp": "1762652580.1562529", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fluently-lm/FluentlyLM-Prinum", + "developer": "fluently-lm", + "inference_platform": "unknown", + "id": "fluently-lm/FluentlyLM-Prinum" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.809033364805383 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7143813967889198 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5400302114803626 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38674496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44714583333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5807845744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fluently-lm/Llama-TI-8B-Instruct/47960f3f-b39c-4641-8a94-fb70f9a6a53f.json b/data/hfopenllm_v2/fluently-lm/Llama-TI-8B-Instruct/47960f3f-b39c-4641-8a94-fb70f9a6a53f.json new file mode 100644 index 000000000..64e676ca9 --- /dev/null +++ b/data/hfopenllm_v2/fluently-lm/Llama-TI-8B-Instruct/47960f3f-b39c-4641-8a94-fb70f9a6a53f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/fluently-lm_Llama-TI-8B-Instruct/1762652580.156872", + "retrieved_timestamp": "1762652580.156876", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fluently-lm/Llama-TI-8B-Instruct", + "developer": "fluently-lm", + "inference_platform": "unknown", + "id": "fluently-lm/Llama-TI-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7716392505219485 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5252143041749421 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23036253776435045 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38134375000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37258976063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fluently-sets/FalconThink3-10B-IT/9329922e-7594-497d-bfab-9c8a18300dc9.json b/data/hfopenllm_v2/fluently-sets/FalconThink3-10B-IT/9329922e-7594-497d-bfab-9c8a18300dc9.json new file mode 100644 index 000000000..24d16ee91 --- /dev/null +++ b/data/hfopenllm_v2/fluently-sets/FalconThink3-10B-IT/9329922e-7594-497d-bfab-9c8a18300dc9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/fluently-sets_FalconThink3-10B-IT/1762652580.1573172", + "retrieved_timestamp": "1762652580.1573179", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fluently-sets/FalconThink3-10B-IT", + "developer": "fluently-sets", + "inference_platform": "unknown", + "id": "fluently-sets/FalconThink3-10B-IT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7326216660682544 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.620016981648187 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24471299093655588 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3347315436241611 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44788541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4434840425531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/fluently-sets/reasoning-1-1k-demo/c63fc7e4-87ae-4516-ad3d-df95693133d5.json b/data/hfopenllm_v2/fluently-sets/reasoning-1-1k-demo/c63fc7e4-87ae-4516-ad3d-df95693133d5.json new file mode 100644 index 000000000..7b495e56a --- /dev/null +++ b/data/hfopenllm_v2/fluently-sets/reasoning-1-1k-demo/c63fc7e4-87ae-4516-ad3d-df95693133d5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/fluently-sets_reasoning-1-1k-demo/1762652580.157624", + "retrieved_timestamp": "1762652580.1576252", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fluently-sets/reasoning-1-1k-demo", + "developer": "fluently-sets", + "inference_platform": "unknown", + "id": "fluently-sets/reasoning-1-1k-demo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7524800861713586 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6396692351083745 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4282477341389728 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33557046979865773 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4060625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4773936170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/936751f5-4483-4986-9a8c-cb002feb8858.json b/data/hfopenllm_v2/formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/936751f5-4483-4986-9a8c-cb002feb8858.json new file mode 100644 index 000000000..cb70c885d --- /dev/null +++ b/data/hfopenllm_v2/formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/936751f5-4483-4986-9a8c-cb002feb8858.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/formulae_mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/1762652580.1578538", + "retrieved_timestamp": "1762652580.157855", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp", + "developer": "formulae", + "inference_platform": "unknown", + "id": "formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16139288199754429 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29763925404210967 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0015105740181268882 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4219375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11735372340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/formulae/mita-elite-v1.1-7b-2-25-2025/7352f47c-8b57-477f-8190-b08b5b23dfb5.json b/data/hfopenllm_v2/formulae/mita-elite-v1.1-7b-2-25-2025/7352f47c-8b57-477f-8190-b08b5b23dfb5.json new file mode 100644 index 000000000..9be7f11d8 --- /dev/null +++ b/data/hfopenllm_v2/formulae/mita-elite-v1.1-7b-2-25-2025/7352f47c-8b57-477f-8190-b08b5b23dfb5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/formulae_mita-elite-v1.1-7b-2-25-2025/1762652580.158112", + "retrieved_timestamp": "1762652580.158113", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "formulae/mita-elite-v1.1-7b-2-25-2025", + "developer": "formulae", + "inference_platform": "unknown", + "id": "formulae/mita-elite-v1.1-7b-2-25-2025" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1249728498162653 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28673660666639783 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2483221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3487291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10979055851063829 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/formulae/mita-elite-v1.1-gen2-7b-2-25-2025/106c33d2-84fb-4ea3-b2d3-78981834fdb0.json b/data/hfopenllm_v2/formulae/mita-elite-v1.1-gen2-7b-2-25-2025/106c33d2-84fb-4ea3-b2d3-78981834fdb0.json new file mode 100644 index 000000000..faf317995 --- /dev/null +++ b/data/hfopenllm_v2/formulae/mita-elite-v1.1-gen2-7b-2-25-2025/106c33d2-84fb-4ea3-b2d3-78981834fdb0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/formulae_mita-elite-v1.1-gen2-7b-2-25-2025/1762652580.158336", + "retrieved_timestamp": "1762652580.158336", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "formulae/mita-elite-v1.1-gen2-7b-2-25-2025", + "developer": "formulae", + "inference_platform": "unknown", + "id": "formulae/mita-elite-v1.1-gen2-7b-2-25-2025" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14108454456397912 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.292375183445424 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35409375000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11012300531914894 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/formulae/mita-elite-v1.2-7b-2-26-2025/761560dc-3a0b-481f-8ec2-4d1ea97cfa6f.json b/data/hfopenllm_v2/formulae/mita-elite-v1.2-7b-2-26-2025/761560dc-3a0b-481f-8ec2-4d1ea97cfa6f.json new file mode 100644 index 000000000..aa0ee9085 --- /dev/null +++ b/data/hfopenllm_v2/formulae/mita-elite-v1.2-7b-2-26-2025/761560dc-3a0b-481f-8ec2-4d1ea97cfa6f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/formulae_mita-elite-v1.2-7b-2-26-2025/1762652580.158752", + "retrieved_timestamp": "1762652580.158756", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "formulae/mita-elite-v1.2-7b-2-26-2025", + "developer": "formulae", + "inference_platform": "unknown", + "id": "formulae/mita-elite-v1.2-7b-2-26-2025" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14800396281865452 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29300480737441686 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0022658610271903325 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4286666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1186003989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/formulae/mita-gen3-7b-2-26-2025/0aa40e02-762d-4a80-932f-f967057c4f50.json b/data/hfopenllm_v2/formulae/mita-gen3-7b-2-26-2025/0aa40e02-762d-4a80-932f-f967057c4f50.json new file mode 100644 index 000000000..c1e925df3 --- /dev/null +++ b/data/hfopenllm_v2/formulae/mita-gen3-7b-2-26-2025/0aa40e02-762d-4a80-932f-f967057c4f50.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/formulae_mita-gen3-7b-2-26-2025/1762652580.159164", + "retrieved_timestamp": "1762652580.159165", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "formulae/mita-gen3-7b-2-26-2025", + "developer": "formulae", + "inference_platform": "unknown", + "id": "formulae/mita-gen3-7b-2-26-2025" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1964144026737944 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2915705776174771 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0022658610271903325 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3912083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11236702127659574 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/formulae/mita-gen3-v1.2-7b-2-26-2025/a28f8779-d2df-4371-b946-472b335f3ca3.json b/data/hfopenllm_v2/formulae/mita-gen3-v1.2-7b-2-26-2025/a28f8779-d2df-4371-b946-472b335f3ca3.json new file mode 100644 index 000000000..5b4f47460 --- /dev/null +++ b/data/hfopenllm_v2/formulae/mita-gen3-v1.2-7b-2-26-2025/a28f8779-d2df-4371-b946-472b335f3ca3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/formulae_mita-gen3-v1.2-7b-2-26-2025/1762652580.15945", + "retrieved_timestamp": "1762652580.1594508", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "formulae/mita-gen3-v1.2-7b-2-26-2025", + "developer": "formulae", + "inference_platform": "unknown", + "id": "formulae/mita-gen3-v1.2-7b-2-26-2025" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2043577707150361 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30577476935056 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0022658610271903325 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38999999999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11278257978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/formulae/mita-math-v2.3-2-25-2025/fa005333-c7b5-4494-a8cb-4edb1f7d00b9.json b/data/hfopenllm_v2/formulae/mita-math-v2.3-2-25-2025/fa005333-c7b5-4494-a8cb-4edb1f7d00b9.json new file mode 100644 index 000000000..23da59f95 --- /dev/null +++ b/data/hfopenllm_v2/formulae/mita-math-v2.3-2-25-2025/fa005333-c7b5-4494-a8cb-4edb1f7d00b9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/formulae_mita-math-v2.3-2-25-2025/1762652580.159737", + "retrieved_timestamp": "1762652580.159738", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "formulae/mita-math-v2.3-2-25-2025", + "developer": "formulae", + "inference_platform": "unknown", + "id": "formulae/mita-math-v2.3-2-25-2025" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13733781920858879 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2949403673764691 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25083892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36975 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11178523936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/formulae/mita-v1-7b/9c629542-6fd0-4cd1-90c7-7f1e95a7a25e.json b/data/hfopenllm_v2/formulae/mita-v1-7b/9c629542-6fd0-4cd1-90c7-7f1e95a7a25e.json new file mode 100644 index 000000000..432a63d06 --- /dev/null +++ b/data/hfopenllm_v2/formulae/mita-v1-7b/9c629542-6fd0-4cd1-90c7-7f1e95a7a25e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/formulae_mita-v1-7b/1762652580.160087", + "retrieved_timestamp": "1762652580.160088", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "formulae/mita-v1-7b", + "developer": "formulae", + "inference_platform": "unknown", + "id": "formulae/mita-v1-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19723888172271792 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3003216459152819 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.002265861027190332 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41520833333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1146941489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/formulae/mita-v1.1-7b-2-24-2025/332cbdd8-96b7-40d5-87c6-3610dcbcdc54.json b/data/hfopenllm_v2/formulae/mita-v1.1-7b-2-24-2025/332cbdd8-96b7-40d5-87c6-3610dcbcdc54.json new file mode 100644 index 000000000..12145041c --- /dev/null +++ b/data/hfopenllm_v2/formulae/mita-v1.1-7b-2-24-2025/332cbdd8-96b7-40d5-87c6-3610dcbcdc54.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/formulae_mita-v1.1-7b-2-24-2025/1762652580.1604211", + "retrieved_timestamp": "1762652580.1604218", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "formulae/mita-v1.1-7b-2-24-2025", + "developer": "formulae", + "inference_platform": "unknown", + "id": "formulae/mita-v1.1-7b-2-24-2025" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34122018466557624 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5442430910797442 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45569791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4523769946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/formulae/mita-v1.2-7b-2-24-2025/a07149d4-66e5-4a0d-b4ae-b696027e821c.json b/data/hfopenllm_v2/formulae/mita-v1.2-7b-2-24-2025/a07149d4-66e5-4a0d-b4ae-b696027e821c.json new file mode 100644 index 000000000..0d4d97207 --- /dev/null +++ b/data/hfopenllm_v2/formulae/mita-v1.2-7b-2-24-2025/a07149d4-66e5-4a0d-b4ae-b696027e821c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/formulae_mita-v1.2-7b-2-24-2025/1762652580.160727", + "retrieved_timestamp": "1762652580.160728", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "formulae/mita-v1.2-7b-2-24-2025", + "developer": "formulae", + "inference_platform": "unknown", + "id": "formulae/mita-v1.2-7b-2-24-2025" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.256415200556745 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4919464940215105 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4879154078549849 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4343958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33585438829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/frameai/Loxa-4B/b8ac82ef-a231-43ee-aaf2-23b0830cfbc3.json b/data/hfopenllm_v2/frameai/Loxa-4B/b8ac82ef-a231-43ee-aaf2-23b0830cfbc3.json new file mode 100644 index 000000000..7306c5d62 --- /dev/null +++ b/data/hfopenllm_v2/frameai/Loxa-4B/b8ac82ef-a231-43ee-aaf2-23b0830cfbc3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/frameai_Loxa-4B/1762652580.160984", + "retrieved_timestamp": "1762652580.160984", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "frameai/Loxa-4B", + "developer": "frameai", + "inference_platform": "unknown", + "id": "frameai/Loxa-4B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47648350820268 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42171373309002896 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1095166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33765625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28016954787234044 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.018 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.1/c2438204-5b2b-41ce-aa95-27afad6f61a9.json b/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.1/c2438204-5b2b-41ce-aa95-27afad6f61a9.json new file mode 100644 index 000000000..bc627056b --- /dev/null +++ b/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.1/c2438204-5b2b-41ce-aa95-27afad6f61a9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/freewheelin_free-solar-evo-v0.1/1762652580.16175", + "retrieved_timestamp": "1762652580.161752", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "freewheelin/free-solar-evo-v0.1", + "developer": "freewheelin", + "inference_platform": "unknown", + "id": "freewheelin/free-solar-evo-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20500715878313985 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4502211109638701 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.008308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4945833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3414228723404255 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.11/d2180e09-02da-48d2-adf6-1710299b272e.json b/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.11/d2180e09-02da-48d2-adf6-1710299b272e.json new file mode 100644 index 000000000..98c0bb375 --- /dev/null +++ b/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.11/d2180e09-02da-48d2-adf6-1710299b272e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/freewheelin_free-solar-evo-v0.11/1762652580.1621969", + "retrieved_timestamp": "1762652580.162198", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "freewheelin/free-solar-evo-v0.11", + "developer": "freewheelin", + "inference_platform": "unknown", + "id": "freewheelin/free-solar-evo-v0.11" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20265894493277836 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4545155032474882 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.008308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5052187499999999 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34674202127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.13/6f6887bf-961c-4b6b-a285-a78459a46488.json b/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.13/6f6887bf-961c-4b6b-a285-a78459a46488.json new file mode 100644 index 000000000..f85188ccc --- /dev/null +++ b/data/hfopenllm_v2/freewheelin/free-solar-evo-v0.13/6f6887bf-961c-4b6b-a285-a78459a46488.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/freewheelin_free-solar-evo-v0.13/1762652580.1624699", + "retrieved_timestamp": "1762652580.1624708", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "freewheelin/free-solar-evo-v0.13", + "developer": "freewheelin", + "inference_platform": "unknown", + "id": "freewheelin/free-solar-evo-v0.13" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2320598234905606 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4554839670962904 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.50515625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34699135638297873 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gabrielmbmb/SmolLM-1.7B-Instruct-IFEval/6e3decae-f2a9-4f71-9511-76d28a675cc2.json b/data/hfopenllm_v2/gabrielmbmb/SmolLM-1.7B-Instruct-IFEval/6e3decae-f2a9-4f71-9511-76d28a675cc2.json new file mode 100644 index 000000000..954c27d98 --- /dev/null +++ b/data/hfopenllm_v2/gabrielmbmb/SmolLM-1.7B-Instruct-IFEval/6e3decae-f2a9-4f71-9511-76d28a675cc2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/gabrielmbmb_SmolLM-1.7B-Instruct-IFEval/1762652580.162997", + "retrieved_timestamp": "1762652580.162998", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gabrielmbmb/SmolLM-1.7B-Instruct-IFEval", + "developer": "gabrielmbmb", + "inference_platform": "unknown", + "id": "gabrielmbmb/SmolLM-1.7B-Instruct-IFEval" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23058595637353335 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313843378282092 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33276041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11560837765957446 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.711 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/3666aa17-279d-4f0b-a6c2-2c8198729df9.json b/data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/3666aa17-279d-4f0b-a6c2-2c8198729df9.json new file mode 100644 index 000000000..676f5a94b --- /dev/null +++ b/data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/3666aa17-279d-4f0b-a6c2-2c8198729df9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/1762652580.163272", + "retrieved_timestamp": "1762652580.1632729", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA", + "developer": "gaverfraxz", + "inference_platform": "unknown", + "id": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40094615619888563 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3984844272016949 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36504166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16539228723404256 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/83a638be-6f3d-4d5b-b1de-6515634aebbd.json b/data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/83a638be-6f3d-4d5b-b1de-6515634aebbd.json new file mode 100644 index 000000000..78e4f39ff --- /dev/null +++ b/data/hfopenllm_v2/gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/83a638be-6f3d-4d5b-b1de-6515634aebbd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/1762652580.163549", + "retrieved_timestamp": "1762652580.16355", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES", + "developer": "gaverfraxz", + "inference_platform": "unknown", + "id": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45505148561372716 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5043660783243713 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36785239361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ghost-x/ghost-8b-beta-1608/b5fba89f-ec8f-4e71-ad19-32c7d85698fb.json b/data/hfopenllm_v2/ghost-x/ghost-8b-beta-1608/b5fba89f-ec8f-4e71-ad19-32c7d85698fb.json new file mode 100644 index 000000000..9e69add89 --- /dev/null +++ b/data/hfopenllm_v2/ghost-x/ghost-8b-beta-1608/b5fba89f-ec8f-4e71-ad19-32c7d85698fb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ghost-x_ghost-8b-beta-1608/1762652580.16434", + "retrieved_timestamp": "1762652580.164341", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ghost-x/ghost-8b-beta-1608", + "developer": "ghost-x", + "inference_platform": "unknown", + "id": "ghost-x/ghost-8b-beta-1608" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42727407722620425 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45165496100352914 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06948640483383686 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35158333333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2839926861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gmonsoon/SahabatAI-MediChatIndo-8B-v1/61543864-320f-41ef-889d-7c0e95a229bd.json b/data/hfopenllm_v2/gmonsoon/SahabatAI-MediChatIndo-8B-v1/61543864-320f-41ef-889d-7c0e95a229bd.json new file mode 100644 index 000000000..e65ff7a56 --- /dev/null +++ b/data/hfopenllm_v2/gmonsoon/SahabatAI-MediChatIndo-8B-v1/61543864-320f-41ef-889d-7c0e95a229bd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/gmonsoon_SahabatAI-MediChatIndo-8B-v1/1762652580.165248", + "retrieved_timestamp": "1762652580.165249", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gmonsoon/SahabatAI-MediChatIndo-8B-v1", + "developer": "gmonsoon", + "inference_platform": "unknown", + "id": "gmonsoon/SahabatAI-MediChatIndo-8B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41628323958208663 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4508834027881236 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.061933534743202415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3753958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3107546542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gmonsoon/SahabatAI-Rebase-8B-Test/a7daa424-7b22-4320-bddd-be350d54b08d.json b/data/hfopenllm_v2/gmonsoon/SahabatAI-Rebase-8B-Test/a7daa424-7b22-4320-bddd-be350d54b08d.json new file mode 100644 index 000000000..09df6239d --- /dev/null +++ b/data/hfopenllm_v2/gmonsoon/SahabatAI-Rebase-8B-Test/a7daa424-7b22-4320-bddd-be350d54b08d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/gmonsoon_SahabatAI-Rebase-8B-Test/1762652580.165493", + "retrieved_timestamp": "1762652580.165493", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gmonsoon/SahabatAI-Rebase-8B-Test", + "developer": "gmonsoon", + "inference_platform": "unknown", + "id": "gmonsoon/SahabatAI-Rebase-8B-Test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5156263159527831 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.522960549734047 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1148036253776435 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41328125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3663563829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gmonsoon/StockSeaLLMs-7B-v1/ac53d663-0e5c-4a7e-8d9d-efcd70d39b10.json b/data/hfopenllm_v2/gmonsoon/StockSeaLLMs-7B-v1/ac53d663-0e5c-4a7e-8d9d-efcd70d39b10.json new file mode 100644 index 000000000..5bc2507c2 --- /dev/null +++ b/data/hfopenllm_v2/gmonsoon/StockSeaLLMs-7B-v1/ac53d663-0e5c-4a7e-8d9d-efcd70d39b10.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/gmonsoon_StockSeaLLMs-7B-v1/1762652580.165695", + "retrieved_timestamp": "1762652580.165696", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gmonsoon/StockSeaLLMs-7B-v1", + "developer": "gmonsoon", + "inference_platform": "unknown", + "id": "gmonsoon/StockSeaLLMs-7B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4599218961245052 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5271087932535433 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19637462235649547 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.421375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39519614361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES/6d500e75-5605-4268-88a1-dc4abc7c5a7f.json b/data/hfopenllm_v2/gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES/6d500e75-5605-4268-88a1-dc4abc7c5a7f.json new file mode 100644 index 000000000..e80d81d20 --- /dev/null +++ b/data/hfopenllm_v2/gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES/6d500e75-5605-4268-88a1-dc4abc7c5a7f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/gmonsoon_gemma2-9b-sahabatai-v1-instruct-BaseTIES/1762652580.165903", + "retrieved_timestamp": "1762652580.1659038", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES", + "developer": "gmonsoon", + "inference_platform": "unknown", + "id": "gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7377923908562614 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6077244532441547 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19939577039274925 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47780208333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43467420212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_full_2/3c550631-c27c-4743-98f3-3ab65c5fa906.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_full_2/3c550631-c27c-4743-98f3-3ab65c5fa906.json new file mode 100644 index 000000000..0f3614ebc --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_full_2/3c550631-c27c-4743-98f3-3ab65c5fa906.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_full_2/1762652580.166118", + "retrieved_timestamp": "1762652580.166118", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/alpaca_data_full_2", + "developer": "godlikehhd", + "inference_platform": "unknown", + "id": "godlikehhd/alpaca_data_full_2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31781450994472443 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4216953430035033 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09290030211480363 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40515625000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.285405585106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_full_3B/d7d6baf0-00d3-4960-970c-949bb9919ac9.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_full_3B/d7d6baf0-00d3-4960-970c-949bb9919ac9.json new file mode 100644 index 000000000..4d89ce685 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_full_3B/d7d6baf0-00d3-4960-970c-949bb9919ac9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_full_3B/1762652580.166356", + "retrieved_timestamp": "1762652580.166357", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/alpaca_data_full_3B", + "developer": "godlikehhd", + "inference_platform": "unknown", + "id": "godlikehhd/alpaca_data_full_3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36957162550920447 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46841893776834337 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1336858006042296 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4954791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.335688164893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600/017ca821-f6ea-43bc-bac1-28dd30c2341d.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600/017ca821-f6ea-43bc-bac1-28dd30c2341d.json new file mode 100644 index 000000000..82eaff22a --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600/017ca821-f6ea-43bc-bac1-28dd30c2341d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ifd_max_2600/1762652580.16661", + "retrieved_timestamp": "1762652580.166613", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/alpaca_data_ifd_max_2600", + "developer": "godlikehhd", + "inference_platform": "unknown", + "id": "godlikehhd/alpaca_data_ifd_max_2600" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3042504997850149 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40285133876405865 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09894259818731117 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3508645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29163896276595747 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600_3B/41d72b83-3c55-460f-9d21-88866eed6b9a.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600_3B/41d72b83-3c55-460f-9d21-88866eed6b9a.json new file mode 100644 index 000000000..67d4f12fe --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_max_2600_3B/41d72b83-3c55-460f-9d21-88866eed6b9a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ifd_max_2600_3B/1762652580.1669528", + "retrieved_timestamp": "1762652580.166954", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/alpaca_data_ifd_max_2600_3B", + "developer": "godlikehhd", + "inference_platform": "unknown", + "id": "godlikehhd/alpaca_data_ifd_max_2600_3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.298155560579263 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4626377955326701 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1593655589123867 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43455208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32878989361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_me_max_5200/e2f13357-053c-42e5-8149-465b4f16d334.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_me_max_5200/e2f13357-053c-42e5-8149-465b4f16d334.json new file mode 100644 index 000000000..ef312cdf8 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_me_max_5200/e2f13357-053c-42e5-8149-465b4f16d334.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ifd_me_max_5200/1762652580.167201", + "retrieved_timestamp": "1762652580.167202", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/alpaca_data_ifd_me_max_5200", + "developer": "godlikehhd", + "inference_platform": "unknown", + "id": "godlikehhd/alpaca_data_ifd_me_max_5200" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36832271705740766 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4153453015610935 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09743202416918428 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3482604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29820478723404253 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_min_2600/5561b7bd-bd90-445c-b969-8d400e99e629.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_min_2600/5561b7bd-bd90-445c-b969-8d400e99e629.json new file mode 100644 index 000000000..112fd0348 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_ifd_min_2600/5561b7bd-bd90-445c-b969-8d400e99e629.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ifd_min_2600/1762652580.167441", + "retrieved_timestamp": "1762652580.167443", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/alpaca_data_ifd_min_2600", + "developer": "godlikehhd", + "inference_platform": "unknown", + "id": "godlikehhd/alpaca_data_ifd_min_2600" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3749673089624419 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4219047173013076 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09667673716012085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36562500000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.289311835106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_ans_max_5200/9c2cee8b-3f35-4a49-814e-ad316fcede7f.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_ans_max_5200/9c2cee8b-3f35-4a49-814e-ad316fcede7f.json new file mode 100644 index 000000000..fcc43fe13 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_ans_max_5200/9c2cee8b-3f35-4a49-814e-ad316fcede7f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ins_ans_max_5200/1762652580.167691", + "retrieved_timestamp": "1762652580.1676931", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/alpaca_data_ins_ans_max_5200", + "developer": "godlikehhd", + "inference_platform": "unknown", + "id": "godlikehhd/alpaca_data_ins_ans_max_5200" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34786477657061043 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40982060224148426 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1027190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3601666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2900598404255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_max_5200/cdd1de41-4e85-4872-be9f-e3af4e9221a9.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_max_5200/cdd1de41-4e85-4872-be9f-e3af4e9221a9.json new file mode 100644 index 000000000..5f09f888b --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_max_5200/cdd1de41-4e85-4872-be9f-e3af4e9221a9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ins_max_5200/1762652580.1679769", + "retrieved_timestamp": "1762652580.167978", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/alpaca_data_ins_max_5200", + "developer": "godlikehhd", + "inference_platform": "unknown", + "id": "godlikehhd/alpaca_data_ins_max_5200" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32750657145263457 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41550742328078477 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09969788519637462 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.361375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2915558510638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_2600/121f28df-65d6-4a48-aa77-4ee794034032.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_2600/121f28df-65d6-4a48-aa77-4ee794034032.json new file mode 100644 index 000000000..acaab9803 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_2600/121f28df-65d6-4a48-aa77-4ee794034032.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ins_min_2600/1762652580.1682088", + "retrieved_timestamp": "1762652580.16821", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/alpaca_data_ins_min_2600", + "developer": "godlikehhd", + "inference_platform": "unknown", + "id": "godlikehhd/alpaca_data_ins_min_2600" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33300199027469335 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41873469888886056 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11102719033232629 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38534375000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28798204787234044 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_5200/d976888b-5e17-4e5c-b557-0b48bf36d4f7.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_5200/d976888b-5e17-4e5c-b557-0b48bf36d4f7.json new file mode 100644 index 000000000..0be2526eb --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_ins_min_5200/d976888b-5e17-4e5c-b557-0b48bf36d4f7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ins_min_5200/1762652580.1684108", + "retrieved_timestamp": "1762652580.1684108", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/alpaca_data_ins_min_5200", + "developer": "godlikehhd", + "inference_platform": "unknown", + "id": "godlikehhd/alpaca_data_ins_min_5200" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3359995921931586 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4289279419241076 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10347432024169184 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39055208333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29488031914893614 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_5200/e7ca66f4-852b-4b5b-8781-d6272a43c559.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_5200/e7ca66f4-852b-4b5b-8781-d6272a43c559.json new file mode 100644 index 000000000..c106b8621 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_5200/e7ca66f4-852b-4b5b-8781-d6272a43c559.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_sampled_ifd_5200/1762652580.1686149", + "retrieved_timestamp": "1762652580.1686149", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/alpaca_data_sampled_ifd_5200", + "developer": "godlikehhd", + "inference_platform": "unknown", + "id": "godlikehhd/alpaca_data_sampled_ifd_5200" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2923853154075631 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4032969715626326 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12537764350453173 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3520729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2896442819148936 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_new_5200/906db90c-7ea4-4878-aa01-06fd1ad0d18a.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_new_5200/906db90c-7ea4-4878-aa01-06fd1ad0d18a.json new file mode 100644 index 000000000..f0dec5747 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_sampled_ifd_new_5200/906db90c-7ea4-4878-aa01-06fd1ad0d18a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_sampled_ifd_new_5200/1762652580.1688168", + "retrieved_timestamp": "1762652580.168818", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/alpaca_data_sampled_ifd_new_5200", + "developer": "godlikehhd", + "inference_platform": "unknown", + "id": "godlikehhd/alpaca_data_sampled_ifd_new_5200" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36632468516868577 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4177831234050982 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09441087613293052 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29247007978723405 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.1_2600/08195b61-5fe5-4cce-8da4-34b731289278.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.1_2600/08195b61-5fe5-4cce-8da4-34b731289278.json new file mode 100644 index 000000000..d4344835f --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.1_2600/08195b61-5fe5-4cce-8da4-34b731289278.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_0.1_2600/1762652580.1691651", + "retrieved_timestamp": "1762652580.169167", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/alpaca_data_score_max_0.1_2600", + "developer": "godlikehhd", + "inference_platform": "unknown", + "id": "godlikehhd/alpaca_data_score_max_0.1_2600" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3287554799044313 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42522607952607777 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09894259818731117 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37064583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29230385638297873 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.3_2600/40e4c93e-7a54-49c2-b513-33edd87f59b0.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.3_2600/40e4c93e-7a54-49c2-b513-33edd87f59b0.json new file mode 100644 index 000000000..d1f4fe128 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.3_2600/40e4c93e-7a54-49c2-b513-33edd87f59b0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_0.3_2600/1762652580.1694138", + "retrieved_timestamp": "1762652580.169415", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/alpaca_data_score_max_0.3_2600", + "developer": "godlikehhd", + "inference_platform": "unknown", + "id": "godlikehhd/alpaca_data_score_max_0.3_2600" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33752332699459653 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4151448369012765 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10347432024169184 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37594791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29130651595744683 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.7_2600/988c6ec3-e967-4cec-993b-e060a5a18e97.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.7_2600/988c6ec3-e967-4cec-993b-e060a5a18e97.json new file mode 100644 index 000000000..ab82067f5 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_0.7_2600/988c6ec3-e967-4cec-993b-e060a5a18e97.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_0.7_2600/1762652580.169624", + "retrieved_timestamp": "1762652580.169625", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/alpaca_data_score_max_0.7_2600", + "developer": "godlikehhd", + "inference_platform": "unknown", + "id": "godlikehhd/alpaca_data_score_max_0.7_2600" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3639764713183243 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41845266250678703 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10725075528700906 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3468645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2982878989361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2500/b6fd288d-36d5-4499-bf2d-da1fdd1120c5.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2500/b6fd288d-36d5-4499-bf2d-da1fdd1120c5.json new file mode 100644 index 000000000..40267fce6 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2500/b6fd288d-36d5-4499-bf2d-da1fdd1120c5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_2500/1762652580.1698968", + "retrieved_timestamp": "1762652580.169898", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/alpaca_data_score_max_2500", + "developer": "godlikehhd", + "inference_platform": "unknown", + "id": "godlikehhd/alpaca_data_score_max_2500" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3563577973111345 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41801375075895447 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09516616314199396 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36270833333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2939660904255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2600_3B/92dc5ec0-5aea-45f5-9237-32b5a65e095b.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2600_3B/92dc5ec0-5aea-45f5-9237-32b5a65e095b.json new file mode 100644 index 000000000..3a2bc19ad --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_2600_3B/92dc5ec0-5aea-45f5-9237-32b5a65e095b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_2600_3B/1762652580.170121", + "retrieved_timestamp": "1762652580.170122", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/alpaca_data_score_max_2600_3B", + "developer": "godlikehhd", + "inference_platform": "unknown", + "id": "godlikehhd/alpaca_data_score_max_2600_3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33577463352792813 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4716306839273412 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15483383685800603 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44744791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3341921542553192 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_5200/d877dbd4-b3da-44b5-974a-1267db396435.json b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_5200/d877dbd4-b3da-44b5-974a-1267db396435.json new file mode 100644 index 000000000..b4e353fd2 --- /dev/null +++ b/data/hfopenllm_v2/godlikehhd/alpaca_data_score_max_5200/d877dbd4-b3da-44b5-974a-1267db396435.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_5200/1762652580.170327", + "retrieved_timestamp": "1762652580.170327", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "godlikehhd/alpaca_data_score_max_5200", + "developer": "godlikehhd", + "inference_platform": "unknown", + "id": "godlikehhd/alpaca_data_score_max_5200" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34454248061809334 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42417102847687554 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09743202416918428 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3877916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446476063829785 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/AALF/gemma-2-27b-it-SimPO-37K-100steps/214ebe7f-357a-435c-9bf5-451bdea1ca9a.json b/data/hfopenllm_v2/google/AALF/gemma-2-27b-it-SimPO-37K-100steps/214ebe7f-357a-435c-9bf5-451bdea1ca9a.json new file mode 100644 index 000000000..3d5393da2 --- /dev/null +++ b/data/hfopenllm_v2/google/AALF/gemma-2-27b-it-SimPO-37K-100steps/214ebe7f-357a-435c-9bf5-451bdea1ca9a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AALF_gemma-2-27b-it-SimPO-37K-100steps/1762652579.472713", + "retrieved_timestamp": "1762652579.472714", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AALF/gemma-2-27b-it-SimPO-37K-100steps", + "developer": "google", + "inference_platform": "unknown", + "id": "AALF/gemma-2-27b-it-SimPO-37K-100steps" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2567642743476199 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39308230769885016 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.021148036253776436 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3329166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21251662234042554 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/AALF/gemma-2-27b-it-SimPO-37K/878ec84b-a365-4887-b7fd-1dc738f6eda8.json b/data/hfopenllm_v2/google/AALF/gemma-2-27b-it-SimPO-37K/878ec84b-a365-4887-b7fd-1dc738f6eda8.json new file mode 100644 index 000000000..6b0409417 --- /dev/null +++ b/data/hfopenllm_v2/google/AALF/gemma-2-27b-it-SimPO-37K/878ec84b-a365-4887-b7fd-1dc738f6eda8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AALF_gemma-2-27b-it-SimPO-37K/1762652579.472391", + "retrieved_timestamp": "1762652579.4723918", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AALF/gemma-2-27b-it-SimPO-37K", + "developer": "google", + "inference_platform": "unknown", + "id": "AALF/gemma-2-27b-it-SimPO-37K" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24065257959990605 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3911343917952534 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3487604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1971409574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/AELLM/gemma-2-aeria-infinity-9b/93d08946-76b5-4547-8bf0-966c5cccd8c1.json b/data/hfopenllm_v2/google/AELLM/gemma-2-aeria-infinity-9b/93d08946-76b5-4547-8bf0-966c5cccd8c1.json new file mode 100644 index 000000000..64916bbd4 --- /dev/null +++ b/data/hfopenllm_v2/google/AELLM/gemma-2-aeria-infinity-9b/93d08946-76b5-4547-8bf0-966c5cccd8c1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AELLM_gemma-2-aeria-infinity-9b/1762652579.4729412", + "retrieved_timestamp": "1762652579.472942", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AELLM/gemma-2-aeria-infinity-9b", + "developer": "google", + "inference_platform": "unknown", + "id": "AELLM/gemma-2-aeria-infinity-9b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.759399504426034 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5983336669577649 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21450151057401812 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3338926174496644 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40196875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38622007978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/AELLM/gemma-2-lyco-infinity-9b/fa16a47e-4009-487b-8252-1fef155ce6b4.json b/data/hfopenllm_v2/google/AELLM/gemma-2-lyco-infinity-9b/fa16a47e-4009-487b-8252-1fef155ce6b4.json new file mode 100644 index 000000000..6a00de2f8 --- /dev/null +++ b/data/hfopenllm_v2/google/AELLM/gemma-2-lyco-infinity-9b/fa16a47e-4009-487b-8252-1fef155ce6b4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AELLM_gemma-2-lyco-infinity-9b/1762652579.473207", + "retrieved_timestamp": "1762652579.473208", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AELLM/gemma-2-lyco-infinity-9b", + "developer": "google", + "inference_platform": "unknown", + "id": "AELLM/gemma-2-lyco-infinity-9b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7316475839660989 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5839534871023703 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17069486404833836 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40063541666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.378656914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/Aashraf995/Gemma-Evo-10B/15b910c7-6c36-4af8-af78-d48278dbc4db.json b/data/hfopenllm_v2/google/Aashraf995/Gemma-Evo-10B/15b910c7-6c36-4af8-af78-d48278dbc4db.json new file mode 100644 index 000000000..ee316bac2 --- /dev/null +++ b/data/hfopenllm_v2/google/Aashraf995/Gemma-Evo-10B/15b910c7-6c36-4af8-af78-d48278dbc4db.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Aashraf995_Gemma-Evo-10B/1762652579.476305", + "retrieved_timestamp": "1762652579.476305", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Aashraf995/Gemma-Evo-10B", + "developer": "google", + "inference_platform": "unknown", + "id": "Aashraf995/Gemma-Evo-10B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7332211864519476 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6044352897552882 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22280966767371602 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3540268456375839 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45947916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4275265957446808 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference/0f948238-5ed2-41ee-a815-3ff20728de89.json b/data/hfopenllm_v2/google/BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference/0f948238-5ed2-41ee-a815-3ff20728de89.json new file mode 100644 index 000000000..4e6900f5f --- /dev/null +++ b/data/hfopenllm_v2/google/BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference/0f948238-5ed2-41ee-a815-3ff20728de89.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BAAI_Gemma2-9B-IT-Simpo-Infinity-Preference/1762652579.487571", + "retrieved_timestamp": "1762652579.487571", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference", + "developer": "google", + "inference_platform": "unknown", + "id": "BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31763831079314 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5979459664230056 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09743202416918428 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33976510067114096 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39657291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3868849734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/BlackBeenie/Neos-Gemma-2-9b/ea9ebbaa-fb04-491d-adc2-0389cb5d1ef6.json b/data/hfopenllm_v2/google/BlackBeenie/Neos-Gemma-2-9b/ea9ebbaa-fb04-491d-adc2-0389cb5d1ef6.json new file mode 100644 index 000000000..155351adf --- /dev/null +++ b/data/hfopenllm_v2/google/BlackBeenie/Neos-Gemma-2-9b/ea9ebbaa-fb04-491d-adc2-0389cb5d1ef6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BlackBeenie_Neos-Gemma-2-9b/1762652579.4958751", + "retrieved_timestamp": "1762652579.495876", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BlackBeenie/Neos-Gemma-2-9b", + "developer": "google", + "inference_platform": "unknown", + "id": "BlackBeenie/Neos-Gemma-2-9b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5875665456544192 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5502975126048852 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09818731117824774 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36175 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39810505319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/Columbia-NLP/LION-Gemma-2b-odpo-v1.0/25418041-6fe1-4cd8-88cb-79456a65210c.json b/data/hfopenllm_v2/google/Columbia-NLP/LION-Gemma-2b-odpo-v1.0/25418041-6fe1-4cd8-88cb-79456a65210c.json new file mode 100644 index 000000000..7caf7c97c --- /dev/null +++ b/data/hfopenllm_v2/google/Columbia-NLP/LION-Gemma-2b-odpo-v1.0/25418041-6fe1-4cd8-88cb-79456a65210c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-Gemma-2b-odpo-v1.0/1762652579.507273", + "retrieved_timestamp": "1762652579.507273", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Columbia-NLP/LION-Gemma-2b-odpo-v1.0", + "developer": "google", + "inference_platform": "unknown", + "id": "Columbia-NLP/LION-Gemma-2b-odpo-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30664858131978706 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3895836210706875 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06948640483383686 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2424496644295302 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42791666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1692154255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-9B/a639bba5-4d0e-4d0b-826a-3eb4d0ccebab.json b/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-9B/a639bba5-4d0e-4d0b-826a-3eb4d0ccebab.json new file mode 100644 index 000000000..6327cb5ef --- /dev/null +++ b/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-9B/a639bba5-4d0e-4d0b-826a-3eb4d0ccebab.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-9B/1762652579.539702", + "retrieved_timestamp": "1762652579.5397062", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/Gemma-The-Writer-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "DavidAU/Gemma-The-Writer-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17403156956874427 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5905439384199537 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08761329305135952 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34563758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.409875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39793882978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-DEADLINE-10B/66d2e2a4-a75c-4fb9-af6a-3181f17281af.json b/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-DEADLINE-10B/66d2e2a4-a75c-4fb9-af6a-3181f17281af.json new file mode 100644 index 000000000..483a78277 --- /dev/null +++ b/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-DEADLINE-10B/66d2e2a4-a75c-4fb9-af6a-3181f17281af.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-DEADLINE-10B/1762652579.5400288", + "retrieved_timestamp": "1762652579.54003", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/Gemma-The-Writer-DEADLINE-10B", + "developer": "google", + "inference_platform": "unknown", + "id": "DavidAU/Gemma-The-Writer-DEADLINE-10B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23315802071836061 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5896087932535433 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09894259818731117 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3422818791946309 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4188645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39461436170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.952 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-J.GutenBerg-10B/3d1cef14-ea09-45ca-a92c-a1fe7a05ce8b.json b/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-J.GutenBerg-10B/3d1cef14-ea09-45ca-a92c-a1fe7a05ce8b.json new file mode 100644 index 000000000..339312494 --- /dev/null +++ b/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-J.GutenBerg-10B/3d1cef14-ea09-45ca-a92c-a1fe7a05ce8b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-J.GutenBerg-10B/1762652579.5402539", + "retrieved_timestamp": "1762652579.540255", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/Gemma-The-Writer-J.GutenBerg-10B", + "developer": "google", + "inference_platform": "unknown", + "id": "DavidAU/Gemma-The-Writer-J.GutenBerg-10B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28578948301617485 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5909421265868766 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09214501510574018 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41759375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3946974734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.034 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-Mighty-Sword-9B/a403d91c-4f30-4d05-9f00-24ce97cc91ac.json b/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-Mighty-Sword-9B/a403d91c-4f30-4d05-9f00-24ce97cc91ac.json new file mode 100644 index 000000000..8d03495e9 --- /dev/null +++ b/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-Mighty-Sword-9B/a403d91c-4f30-4d05-9f00-24ce97cc91ac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-Mighty-Sword-9B/1762652579.540473", + "retrieved_timestamp": "1762652579.5404742", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/Gemma-The-Writer-Mighty-Sword-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "DavidAU/Gemma-The-Writer-Mighty-Sword-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7527549125209998 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5911959785635329 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19108761329305135 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34815436241610737 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4111770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39677526595744683 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/b708a2a6-d738-48a9-9c20-0838bdb19646.json b/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/b708a2a6-d738-48a9-9c20-0838bdb19646.json new file mode 100644 index 000000000..8e81e365a --- /dev/null +++ b/data/hfopenllm_v2/google/DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/b708a2a6-d738-48a9-9c20-0838bdb19646.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/1762652579.540709", + "retrieved_timestamp": "1762652579.54071", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored", + "developer": "google", + "inference_platform": "unknown", + "id": "DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7070927361622716 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5922294775018883 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.229607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3414429530201342 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41632291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3966090425531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.034 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/EpistemeAI/Athena-gemma-2-2b-it-Philos/21096485-ff49-4481-a530-48746334fceb.json b/data/hfopenllm_v2/google/EpistemeAI/Athena-gemma-2-2b-it-Philos/21096485-ff49-4481-a530-48746334fceb.json new file mode 100644 index 000000000..0a91c9bda --- /dev/null +++ b/data/hfopenllm_v2/google/EpistemeAI/Athena-gemma-2-2b-it-Philos/21096485-ff49-4481-a530-48746334fceb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Athena-gemma-2-2b-it-Philos/1762652579.598697", + "retrieved_timestamp": "1762652579.598698", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Athena-gemma-2-2b-it-Philos", + "developer": "google", + "inference_platform": "unknown", + "id": "EpistemeAI/Athena-gemma-2-2b-it-Philos" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4620950189940469 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37947768790586744 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03700906344410876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43136458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22481715425531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/EpistemeAI/Athena-gemma-2-2b-it/a0ca047c-97c2-4ba1-84a7-ba0b00ba6d25.json b/data/hfopenllm_v2/google/EpistemeAI/Athena-gemma-2-2b-it/a0ca047c-97c2-4ba1-84a7-ba0b00ba6d25.json new file mode 100644 index 000000000..d97646627 --- /dev/null +++ b/data/hfopenllm_v2/google/EpistemeAI/Athena-gemma-2-2b-it/a0ca047c-97c2-4ba1-84a7-ba0b00ba6d25.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Athena-gemma-2-2b-it/1762652579.598221", + "retrieved_timestamp": "1762652579.598221", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Athena-gemma-2-2b-it", + "developer": "google", + "inference_platform": "unknown", + "id": "EpistemeAI/Athena-gemma-2-2b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3134172883504657 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42642293591146 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04909365558912387 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43505208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2421875 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3/c05e106e-203a-49e7-b656-22809ac16037.json b/data/hfopenllm_v2/google/EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3/c05e106e-203a-49e7-b656-22809ac16037.json new file mode 100644 index 000000000..a4bc58154 --- /dev/null +++ b/data/hfopenllm_v2/google/EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3/c05e106e-203a-49e7-b656-22809ac16037.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Athene-codegemma-2-7b-it-alpaca-v1.3/1762652579.598942", + "retrieved_timestamp": "1762652579.598943", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3", + "developer": "google", + "inference_platform": "unknown", + "id": "EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40299405577201824 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4331916189482215 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.061933534743202415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4503020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25872672872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GemmaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2/ea4bffba-6e14-4380-a060-2b4deb6d94c0.json b/data/hfopenllm_v2/google/EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2/ea4bffba-6e14-4380-a060-2b4deb6d94c0.json new file mode 100644 index 000000000..a9d11bdcc --- /dev/null +++ b/data/hfopenllm_v2/google/EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2/ea4bffba-6e14-4380-a060-2b4deb6d94c0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Athene-codegemma-2-7b-it-alpaca-v1.2/1762652579.609552", + "retrieved_timestamp": "1762652579.6095529", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2", + "developer": "google", + "inference_platform": "unknown", + "id": "EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4351177098986245 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41754154460978427 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04229607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41696875000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22972074468085107 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaOrca-2-Merged/d4bb122a-87b4-482e-8050-7c1716a4ed5b.json b/data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaOrca-2-Merged/d4bb122a-87b4-482e-8050-7c1716a4ed5b.json new file mode 100644 index 000000000..0cb75fba0 --- /dev/null +++ b/data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaOrca-2-Merged/d4bb122a-87b4-482e-8050-7c1716a4ed5b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/GenVRadmin_AryaBhatta-GemmaOrca-2-Merged/1762652579.627253", + "retrieved_timestamp": "1762652579.627253", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GenVRadmin/AryaBhatta-GemmaOrca-2-Merged", + "developer": "google", + "inference_platform": "unknown", + "id": "GenVRadmin/AryaBhatta-GemmaOrca-2-Merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30637375497014585 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3887493166323577 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04984894259818731 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4550208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23844747340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaOrca-Merged/179d4baf-7da1-4a56-82e7-35ea45204e13.json b/data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaOrca-Merged/179d4baf-7da1-4a56-82e7-35ea45204e13.json new file mode 100644 index 000000000..d687f2a3e --- /dev/null +++ b/data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaOrca-Merged/179d4baf-7da1-4a56-82e7-35ea45204e13.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/GenVRadmin_AryaBhatta-GemmaOrca-Merged/1762652579.627504", + "retrieved_timestamp": "1762652579.6275048", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GenVRadmin/AryaBhatta-GemmaOrca-Merged", + "developer": "google", + "inference_platform": "unknown", + "id": "GenVRadmin/AryaBhatta-GemmaOrca-Merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30637375497014585 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4130633897394575 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3523854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22282247340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaUltra-Merged/4aca90c3-b0c0-4ec6-ba6b-0d5b09ef63fe.json b/data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaUltra-Merged/4aca90c3-b0c0-4ec6-ba6b-0d5b09ef63fe.json new file mode 100644 index 000000000..4b4b3b1af --- /dev/null +++ b/data/hfopenllm_v2/google/GenVRadmin/AryaBhatta-GemmaUltra-Merged/4aca90c3-b0c0-4ec6-ba6b-0d5b09ef63fe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/GenVRadmin_AryaBhatta-GemmaUltra-Merged/1762652579.627715", + "retrieved_timestamp": "1762652579.627716", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GenVRadmin/AryaBhatta-GemmaUltra-Merged", + "developer": "google", + "inference_platform": "unknown", + "id": "GenVRadmin/AryaBhatta-GemmaUltra-Merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30207737691547315 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4141445378464817 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25335570469798663 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42785416666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2265625 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge-PEFT/7891a95c-8d95-4181-96e8-cdc2f6ab538b.json b/data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge-PEFT/7891a95c-8d95-4181-96e8-cdc2f6ab538b.json new file mode 100644 index 000000000..75c9626ba --- /dev/null +++ b/data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge-PEFT/7891a95c-8d95-4181-96e8-cdc2f6ab538b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Gunulhona_Gemma-Ko-Merge-PEFT/1762652579.635783", + "retrieved_timestamp": "1762652579.635786", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gunulhona/Gemma-Ko-Merge-PEFT", + "developer": "google", + "inference_platform": "unknown", + "id": "Gunulhona/Gemma-Ko-Merge-PEFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4441348954108433 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4862989687822461 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3985833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3097573138297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 20.318 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge-PEFT/f9fb4008-db4e-4a84-b12b-050bdf35084f.json b/data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge-PEFT/f9fb4008-db4e-4a84-b12b-050bdf35084f.json new file mode 100644 index 000000000..d11cecad0 --- /dev/null +++ b/data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge-PEFT/f9fb4008-db4e-4a84-b12b-050bdf35084f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Gunulhona_Gemma-Ko-Merge-PEFT/1762652579.635457", + "retrieved_timestamp": "1762652579.635457", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gunulhona/Gemma-Ko-Merge-PEFT", + "developer": "google", + "inference_platform": "unknown", + "id": "Gunulhona/Gemma-Ko-Merge-PEFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28803906966847964 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5154093999781059 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40801041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38173204787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 20.318 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge/dccf426d-63bb-4298-958f-d1f4776f03b2.json b/data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge/dccf426d-63bb-4298-958f-d1f4776f03b2.json new file mode 100644 index 000000000..f2c993c0e --- /dev/null +++ b/data/hfopenllm_v2/google/Gunulhona/Gemma-Ko-Merge/dccf426d-63bb-4298-958f-d1f4776f03b2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Gunulhona_Gemma-Ko-Merge/1762652579.635146", + "retrieved_timestamp": "1762652579.635147", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gunulhona/Gemma-Ko-Merge", + "developer": "google", + "inference_platform": "unknown", + "id": "Gunulhona/Gemma-Ko-Merge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6415721397004392 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5813027258981727 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18806646525679757 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33557046979865773 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40469791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3878823138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/HuggingFaceH4/zephyr-7b-gemma-v0.1/dcf4d2bb-ee8f-4083-baf6-8870731515fa.json b/data/hfopenllm_v2/google/HuggingFaceH4/zephyr-7b-gemma-v0.1/dcf4d2bb-ee8f-4083-baf6-8870731515fa.json new file mode 100644 index 000000000..a7f0ce45a --- /dev/null +++ b/data/hfopenllm_v2/google/HuggingFaceH4/zephyr-7b-gemma-v0.1/dcf4d2bb-ee8f-4083-baf6-8870731515fa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HuggingFaceH4_zephyr-7b-gemma-v0.1/1762652579.641236", + "retrieved_timestamp": "1762652579.641237", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HuggingFaceH4/zephyr-7b-gemma-v0.1", + "developer": "google", + "inference_platform": "unknown", + "id": "HuggingFaceH4/zephyr-7b-gemma-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3363741539116212 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4623735014679749 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08157099697885196 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37396874999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2847406914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0/51d4db96-4c38-464a-9e7f-0ade67699c8d.json b/data/hfopenllm_v2/google/INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0/51d4db96-4c38-464a-9e7f-0ade67699c8d.json new file mode 100644 index 000000000..788ac4539 --- /dev/null +++ b/data/hfopenllm_v2/google/INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0/51d4db96-4c38-464a-9e7f-0ade67699c8d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/INSAIT-Institute_BgGPT-Gemma-2-27B-IT-v1.0/1762652579.645844", + "retrieved_timestamp": "1762652579.645845", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0", + "developer": "google", + "inference_platform": "unknown", + "id": "INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911778102988436 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35753125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11668882978723404 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/IlyaGusev/gemma-2-2b-it-abliterated/e3ee4f00-1037-4da7-96e2-934b5ccefd15.json b/data/hfopenllm_v2/google/IlyaGusev/gemma-2-2b-it-abliterated/e3ee4f00-1037-4da7-96e2-934b5ccefd15.json new file mode 100644 index 000000000..c0fe2e3ab --- /dev/null +++ b/data/hfopenllm_v2/google/IlyaGusev/gemma-2-2b-it-abliterated/e3ee4f00-1037-4da7-96e2-934b5ccefd15.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/IlyaGusev_gemma-2-2b-it-abliterated/1762652579.646105", + "retrieved_timestamp": "1762652579.646106", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IlyaGusev/gemma-2-2b-it-abliterated", + "developer": "google", + "inference_platform": "unknown", + "id": "IlyaGusev/gemma-2-2b-it-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.533086654521115 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4118601326211988 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37818749999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25382313829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/IlyaGusev/gemma-2-9b-it-abliterated/8a81c9e6-1c72-46f6-98c6-0d3b28ba5633.json b/data/hfopenllm_v2/google/IlyaGusev/gemma-2-9b-it-abliterated/8a81c9e6-1c72-46f6-98c6-0d3b28ba5633.json new file mode 100644 index 000000000..1f9246265 --- /dev/null +++ b/data/hfopenllm_v2/google/IlyaGusev/gemma-2-9b-it-abliterated/8a81c9e6-1c72-46f6-98c6-0d3b28ba5633.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/IlyaGusev_gemma-2-9b-it-abliterated/1762652579.646349", + "retrieved_timestamp": "1762652579.6463501", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IlyaGusev/gemma-2-9b-it-abliterated", + "developer": "google", + "inference_platform": "unknown", + "id": "IlyaGusev/gemma-2-9b-it-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.747259493698941 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.59063299776093 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17749244712990936 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34563758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4033645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39153922872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/LenguajeNaturalAI/leniachat-gemma-2b-v0/af954640-6806-4e4c-9c0b-b81215eadfc8.json b/data/hfopenllm_v2/google/LenguajeNaturalAI/leniachat-gemma-2b-v0/af954640-6806-4e4c-9c0b-b81215eadfc8.json new file mode 100644 index 000000000..55638238d --- /dev/null +++ b/data/hfopenllm_v2/google/LenguajeNaturalAI/leniachat-gemma-2b-v0/af954640-6806-4e4c-9c0b-b81215eadfc8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LenguajeNaturalAI_leniachat-gemma-2b-v0/1762652579.7101068", + "retrieved_timestamp": "1762652579.7101078", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LenguajeNaturalAI/leniachat-gemma-2b-v0", + "developer": "google", + "inference_platform": "unknown", + "id": "LenguajeNaturalAI/leniachat-gemma-2b-v0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21497404664069114 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30740211895412034 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.011329305135951661 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36590625000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11702127659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/ModelSpace/GemmaX2-28-9B-v0.1/6cb560eb-08f5-4430-8797-1116f1d2f56c.json b/data/hfopenllm_v2/google/ModelSpace/GemmaX2-28-9B-v0.1/6cb560eb-08f5-4430-8797-1116f1d2f56c.json new file mode 100644 index 000000000..2450df08c --- /dev/null +++ b/data/hfopenllm_v2/google/ModelSpace/GemmaX2-28-9B-v0.1/6cb560eb-08f5-4430-8797-1116f1d2f56c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ModelSpace_GemmaX2-28-9B-v0.1/1762652579.76179", + "retrieved_timestamp": "1762652579.761791", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ModelSpace/GemmaX2-28-9B-v0.1", + "developer": "google", + "inference_platform": "unknown", + "id": "ModelSpace/GemmaX2-28-9B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.003921816336210145 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3687226427280163 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027190332326283987 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35365625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2230718085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/NAPS-ai/naps-gemma-2-27b-v-0.1.0/8768f068-452f-4a54-bddb-9f6cffaf5a19.json b/data/hfopenllm_v2/google/NAPS-ai/naps-gemma-2-27b-v-0.1.0/8768f068-452f-4a54-bddb-9f6cffaf5a19.json new file mode 100644 index 000000000..33ab18759 --- /dev/null +++ b/data/hfopenllm_v2/google/NAPS-ai/naps-gemma-2-27b-v-0.1.0/8768f068-452f-4a54-bddb-9f6cffaf5a19.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-gemma-2-27b-v-0.1.0/1762652579.7653928", + "retrieved_timestamp": "1762652579.765394", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NAPS-ai/naps-gemma-2-27b-v-0.1.0", + "developer": "google", + "inference_platform": "unknown", + "id": "NAPS-ai/naps-gemma-2-27b-v-0.1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911778102988436 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35753125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11677194148936171 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/NAPS-ai/naps-gemma-2-27b-v0.1.0/b004d154-392d-4f31-afbb-547b058996bd.json b/data/hfopenllm_v2/google/NAPS-ai/naps-gemma-2-27b-v0.1.0/b004d154-392d-4f31-afbb-547b058996bd.json new file mode 100644 index 000000000..33ae17d7e --- /dev/null +++ b/data/hfopenllm_v2/google/NAPS-ai/naps-gemma-2-27b-v0.1.0/b004d154-392d-4f31-afbb-547b058996bd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-gemma-2-27b-v0.1.0/1762652579.765648", + "retrieved_timestamp": "1762652579.7656488", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NAPS-ai/naps-gemma-2-27b-v0.1.0", + "developer": "google", + "inference_platform": "unknown", + "id": "NAPS-ai/naps-gemma-2-27b-v0.1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911778102988436 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35753125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11677194148936171 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/SaisExperiments/Gemma-2-2B-Stheno-Filtered/16070acb-e8bb-476a-b5aa-863a85cb0aee.json b/data/hfopenllm_v2/google/SaisExperiments/Gemma-2-2B-Stheno-Filtered/16070acb-e8bb-476a-b5aa-863a85cb0aee.json new file mode 100644 index 000000000..548808c80 --- /dev/null +++ b/data/hfopenllm_v2/google/SaisExperiments/Gemma-2-2B-Stheno-Filtered/16070acb-e8bb-476a-b5aa-863a85cb0aee.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SaisExperiments_Gemma-2-2B-Stheno-Filtered/1762652579.855671", + "retrieved_timestamp": "1762652579.8556721", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SaisExperiments/Gemma-2-2B-Stheno-Filtered", + "developer": "google", + "inference_platform": "unknown", + "id": "SaisExperiments/Gemma-2-2B-Stheno-Filtered" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4196554032190144 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4149234152222183 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04607250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40029166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2629654255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/Skywork/Skywork-Reward-Gemma-2-27B-v0.2/140b0661-2961-46f3-8c75-cb75147e0acc.json b/data/hfopenllm_v2/google/Skywork/Skywork-Reward-Gemma-2-27B-v0.2/140b0661-2961-46f3-8c75-cb75147e0acc.json new file mode 100644 index 000000000..6e0fa4ae5 --- /dev/null +++ b/data/hfopenllm_v2/google/Skywork/Skywork-Reward-Gemma-2-27B-v0.2/140b0661-2961-46f3-8c75-cb75147e0acc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Skywork_Skywork-Reward-Gemma-2-27B-v0.2/1762652579.8884969", + "retrieved_timestamp": "1762652579.8884978", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Skywork/Skywork-Reward-Gemma-2-27B-v0.2", + "developer": "google", + "inference_platform": "unknown", + "id": "Skywork/Skywork-Reward-Gemma-2-27B-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7807317916461656 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.635960062329604 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22734138972809667 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34395973154362414 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42314583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4103224734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForSequenceClassification", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/Sorawiz/Gemma-9B-Base/246e4c1f-016c-411e-870e-9ade63713daa.json b/data/hfopenllm_v2/google/Sorawiz/Gemma-9B-Base/246e4c1f-016c-411e-870e-9ade63713daa.json new file mode 100644 index 000000000..0acd66faa --- /dev/null +++ b/data/hfopenllm_v2/google/Sorawiz/Gemma-9B-Base/246e4c1f-016c-411e-870e-9ade63713daa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sorawiz_Gemma-9B-Base/1762652579.8897338", + "retrieved_timestamp": "1762652579.889735", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sorawiz/Gemma-9B-Base", + "developer": "google", + "inference_platform": "unknown", + "id": "Sorawiz/Gemma-9B-Base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16673758959560633 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.593040577894583 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09818731117824774 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33976510067114096 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40451041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42353723404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/Sorawiz/Gemma-Creative-9B-Base/26229a4f-9f53-453f-9899-77808040f8cb.json b/data/hfopenllm_v2/google/Sorawiz/Gemma-Creative-9B-Base/26229a4f-9f53-453f-9899-77808040f8cb.json new file mode 100644 index 000000000..27be9020e --- /dev/null +++ b/data/hfopenllm_v2/google/Sorawiz/Gemma-Creative-9B-Base/26229a4f-9f53-453f-9899-77808040f8cb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sorawiz_Gemma-Creative-9B-Base/1762652579.890075", + "retrieved_timestamp": "1762652579.890076", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sorawiz/Gemma-Creative-9B-Base", + "developer": "google", + "inference_platform": "unknown", + "id": "Sorawiz/Gemma-Creative-9B-Base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1515002415812267 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5458614335095562 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07779456193353475 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3296979865771812 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.401875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4007646276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/Supichi/BBAI_135_Gemma/64cd00af-6782-431b-aac1-445e39d56717.json b/data/hfopenllm_v2/google/Supichi/BBAI_135_Gemma/64cd00af-6782-431b-aac1-445e39d56717.json new file mode 100644 index 000000000..17ef9004a --- /dev/null +++ b/data/hfopenllm_v2/google/Supichi/BBAI_135_Gemma/64cd00af-6782-431b-aac1-445e39d56717.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Supichi_BBAI_135_Gemma/1762652579.8946822", + "retrieved_timestamp": "1762652579.894683", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Supichi/BBAI_135_Gemma", + "developer": "google", + "inference_platform": "unknown", + "id": "Supichi/BBAI_135_Gemma" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06562144000141845 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35684129093449685 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38047916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16722074468085107 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 19.3 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/TheDrummer/Gemmasutra-9B-v1/3f7a68f4-e456-4ecf-8a5f-1f3698822a89.json b/data/hfopenllm_v2/google/TheDrummer/Gemmasutra-9B-v1/3f7a68f4-e456-4ecf-8a5f-1f3698822a89.json new file mode 100644 index 000000000..1340a4252 --- /dev/null +++ b/data/hfopenllm_v2/google/TheDrummer/Gemmasutra-9B-v1/3f7a68f4-e456-4ecf-8a5f-1f3698822a89.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TheDrummer_Gemmasutra-9B-v1/1762652579.9140742", + "retrieved_timestamp": "1762652579.914075", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheDrummer/Gemmasutra-9B-v1", + "developer": "google", + "inference_platform": "unknown", + "id": "TheDrummer/Gemmasutra-9B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24155130609006326 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5886914248369671 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48459375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4045046542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/TheDrummer/Gemmasutra-Mini-2B-v1/3c066bd3-ec6c-412d-86a1-759c228610b9.json b/data/hfopenllm_v2/google/TheDrummer/Gemmasutra-Mini-2B-v1/3c066bd3-ec6c-412d-86a1-759c228610b9.json new file mode 100644 index 000000000..ae84f3630 --- /dev/null +++ b/data/hfopenllm_v2/google/TheDrummer/Gemmasutra-Mini-2B-v1/3c066bd3-ec6c-412d-86a1-759c228610b9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TheDrummer_Gemmasutra-Mini-2B-v1/1762652579.914318", + "retrieved_timestamp": "1762652579.914319", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheDrummer/Gemmasutra-Mini-2B-v1", + "developer": "google", + "inference_platform": "unknown", + "id": "TheDrummer/Gemmasutra-Mini-2B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25486597782771936 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35750190791471836 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0377643504531722 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3489791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20545212765957446 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v1/7b093f59-7a4e-4e72-b9a6-7d10870917ea.json b/data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v1/7b093f59-7a4e-4e72-b9a6-7d10870917ea.json new file mode 100644 index 000000000..0bcb644f9 --- /dev/null +++ b/data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v1/7b093f59-7a4e-4e72-b9a6-7d10870917ea.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TheDrummer_Tiger-Gemma-9B-v1/1762652579.915312", + "retrieved_timestamp": "1762652579.915313", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheDrummer/Tiger-Gemma-9B-v1", + "developer": "google", + "inference_platform": "unknown", + "id": "TheDrummer/Tiger-Gemma-9B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.728150197032762 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5703687739329574 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18353474320241692 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3389261744966443 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41616666666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41181848404255317 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v2/962205b9-009a-4201-b382-5143c80e78ce.json b/data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v2/962205b9-009a-4201-b382-5143c80e78ce.json new file mode 100644 index 000000000..f376d5dd7 --- /dev/null +++ b/data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v2/962205b9-009a-4201-b382-5143c80e78ce.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TheDrummer_Tiger-Gemma-9B-v2/1762652579.915529", + "retrieved_timestamp": "1762652579.91553", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheDrummer/Tiger-Gemma-9B-v2", + "developer": "google", + "inference_platform": "unknown", + "id": "TheDrummer/Tiger-Gemma-9B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6985997154217476 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5617191114121779 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18202416918429004 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33976510067114096 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40841666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41123670212765956 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v3/6fbfd3ba-e28a-4e9d-be12-e04b6d50b9ee.json b/data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v3/6fbfd3ba-e28a-4e9d-be12-e04b6d50b9ee.json new file mode 100644 index 000000000..5822da393 --- /dev/null +++ b/data/hfopenllm_v2/google/TheDrummer/Tiger-Gemma-9B-v3/6fbfd3ba-e28a-4e9d-be12-e04b6d50b9ee.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TheDrummer_Tiger-Gemma-9B-v3/1762652579.915734", + "retrieved_timestamp": "1762652579.915734", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheDrummer/Tiger-Gemma-9B-v3", + "developer": "google", + "inference_platform": "unknown", + "id": "TheDrummer/Tiger-Gemma-9B-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6820635912711606 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5812231557853248 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1623867069486405 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3389261744966443 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4003541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40591755319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/Triangle104/Gemmadevi-Stock-10B/153fd43a-fe54-4a99-98dd-5420f2bf8b66.json b/data/hfopenllm_v2/google/Triangle104/Gemmadevi-Stock-10B/153fd43a-fe54-4a99-98dd-5420f2bf8b66.json new file mode 100644 index 000000000..cb7771fb2 --- /dev/null +++ b/data/hfopenllm_v2/google/Triangle104/Gemmadevi-Stock-10B/153fd43a-fe54-4a99-98dd-5420f2bf8b66.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Gemmadevi-Stock-10B/1762652579.9249291", + "retrieved_timestamp": "1762652579.9249291", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Gemmadevi-Stock-10B", + "developer": "google", + "inference_platform": "unknown", + "id": "Triangle104/Gemmadevi-Stock-10B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15819470117067158 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6065922684184144 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09667673716012085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35318791946308725 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46211458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4261968085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1/687769ed-44e9-4f3d-aee6-2dc4e98dd7ee.json b/data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1/687769ed-44e9-4f3d-aee6-2dc4e98dd7ee.json new file mode 100644 index 000000000..e1fd6426e --- /dev/null +++ b/data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1/687769ed-44e9-4f3d-aee6-2dc4e98dd7ee.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter1/1762652579.936019", + "retrieved_timestamp": "1762652579.93602", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1", + "developer": "google", + "inference_platform": "unknown", + "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.308221075634871 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5968934762705508 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08987915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33640939597315433 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4099375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39070811170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2/fa584f01-69eb-4ecc-9f0d-049b6bfb05c8.json b/data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2/fa584f01-69eb-4ecc-9f0d-049b6bfb05c8.json new file mode 100644 index 000000000..9444e2da9 --- /dev/null +++ b/data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2/fa584f01-69eb-4ecc-9f0d-049b6bfb05c8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter2/1762652579.936279", + "retrieved_timestamp": "1762652579.93628", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2", + "developer": "google", + "inference_platform": "unknown", + "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3100196367859502 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5989880877421281 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08081570996978851 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3347315436241611 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4139375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.386968085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3/f318d457-d295-4447-9222-0b0d92708b5d.json b/data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3/f318d457-d295-4447-9222-0b0d92708b5d.json new file mode 100644 index 000000000..658860b01 --- /dev/null +++ b/data/hfopenllm_v2/google/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3/f318d457-d295-4447-9222-0b0d92708b5d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter3/1762652579.9364889", + "retrieved_timestamp": "1762652579.93649", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3", + "developer": "google", + "inference_platform": "unknown", + "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31671409637539505 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6007080229268026 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07099697885196375 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3389261744966443 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41660416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.382563164893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-Gemma-2b/b002a274-9b4f-40ad-b0c7-e4efabbe431f.json b/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-Gemma-2b/b002a274-9b4f-40ad-b0c7-e4efabbe431f.json new file mode 100644 index 000000000..328d48174 --- /dev/null +++ b/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-Gemma-2b/b002a274-9b4f-40ad-b0c7-e4efabbe431f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Gemma-2b/1762652579.941349", + "retrieved_timestamp": "1762652579.94135", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VAGOsolutions/SauerkrautLM-Gemma-2b", + "developer": "google", + "inference_platform": "unknown", + "id": "VAGOsolutions/SauerkrautLM-Gemma-2b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24752213017017072 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3416315376053174 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3675833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14685837765957446 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-Gemma-7b/e66f4326-2585-4581-b45f-d9a81fb1576c.json b/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-Gemma-7b/e66f4326-2585-4581-b45f-d9a81fb1576c.json new file mode 100644 index 000000000..f8d978b0a --- /dev/null +++ b/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-Gemma-7b/e66f4326-2585-4581-b45f-d9a81fb1576c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Gemma-7b/1762652579.9415941", + "retrieved_timestamp": "1762652579.9415948", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VAGOsolutions/SauerkrautLM-Gemma-7b", + "developer": "google", + "inference_platform": "unknown", + "id": "VAGOsolutions/SauerkrautLM-Gemma-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3406705319662939 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41879127895858687 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06722054380664652 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35942708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961269946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-gemma-2-2b-it/b010858c-edb5-4e49-b5b6-72b06943ab2c.json b/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-gemma-2-2b-it/b010858c-edb5-4e49-b5b6-72b06943ab2c.json new file mode 100644 index 000000000..be8967233 --- /dev/null +++ b/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-gemma-2-2b-it/b010858c-edb5-4e49-b5b6-72b06943ab2c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-gemma-2-2b-it/1762652579.9427688", + "retrieved_timestamp": "1762652579.94277", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VAGOsolutions/SauerkrautLM-gemma-2-2b-it", + "developer": "google", + "inference_platform": "unknown", + "id": "VAGOsolutions/SauerkrautLM-gemma-2-2b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13206625088099574 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42408371860644856 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3994583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.269281914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-gemma-2-9b-it/5395cbac-afe0-4936-b4eb-f554fcb5be75.json b/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-gemma-2-9b-it/5395cbac-afe0-4936-b4eb-f554fcb5be75.json new file mode 100644 index 000000000..b182fc0ea --- /dev/null +++ b/data/hfopenllm_v2/google/VAGOsolutions/SauerkrautLM-gemma-2-9b-it/5395cbac-afe0-4936-b4eb-f554fcb5be75.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-gemma-2-9b-it/1762652579.94298", + "retrieved_timestamp": "1762652579.942981", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VAGOsolutions/SauerkrautLM-gemma-2-9b-it", + "developer": "google", + "inference_platform": "unknown", + "id": "VAGOsolutions/SauerkrautLM-gemma-2-9b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3024009627787604 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6072645787154746 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08383685800604229 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271812080536913 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43182291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40907579787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/Youlln/4PRYMMAL-GEMMA2-9B-SLERP/06b75d54-4d17-4116-a4d5-0917eedb2dc4.json b/data/hfopenllm_v2/google/Youlln/4PRYMMAL-GEMMA2-9B-SLERP/06b75d54-4d17-4116-a4d5-0917eedb2dc4.json new file mode 100644 index 000000000..3e84cf788 --- /dev/null +++ b/data/hfopenllm_v2/google/Youlln/4PRYMMAL-GEMMA2-9B-SLERP/06b75d54-4d17-4116-a4d5-0917eedb2dc4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Youlln_4PRYMMAL-GEMMA2-9B-SLERP/1762652579.961175", + "retrieved_timestamp": "1762652579.9611762", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Youlln/4PRYMMAL-GEMMA2-9B-SLERP", + "developer": "google", + "inference_platform": "unknown", + "id": "Youlln/4PRYMMAL-GEMMA2-9B-SLERP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2713766140507188 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5922529923998928 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09063444108761329 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46719791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42096077127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/ZHLiu627/zephyr-7b-gemma-rpo-avg/6333359d-1cf7-4905-9a48-f8a8f7b46ed2.json b/data/hfopenllm_v2/google/ZHLiu627/zephyr-7b-gemma-rpo-avg/6333359d-1cf7-4905-9a48-f8a8f7b46ed2.json new file mode 100644 index 000000000..5af0f8c5a --- /dev/null +++ b/data/hfopenllm_v2/google/ZHLiu627/zephyr-7b-gemma-rpo-avg/6333359d-1cf7-4905-9a48-f8a8f7b46ed2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ZHLiu627_zephyr-7b-gemma-rpo-avg/1762652579.9660559", + "retrieved_timestamp": "1762652579.966057", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZHLiu627/zephyr-7b-gemma-rpo-avg", + "developer": "google", + "inference_platform": "unknown", + "id": "ZHLiu627/zephyr-7b-gemma-rpo-avg" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30060350979844586 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41832761356743015 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04984894259818731 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40810416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2830784574468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/agentlans/Gemma2-9B-AdvancedFuse/3bcdf1ca-ad29-45cf-ac97-6bc508981545.json b/data/hfopenllm_v2/google/agentlans/Gemma2-9B-AdvancedFuse/3bcdf1ca-ad29-45cf-ac97-6bc508981545.json new file mode 100644 index 000000000..4b1006c54 --- /dev/null +++ b/data/hfopenllm_v2/google/agentlans/Gemma2-9B-AdvancedFuse/3bcdf1ca-ad29-45cf-ac97-6bc508981545.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/agentlans_Gemma2-9B-AdvancedFuse/1762652579.975734", + "retrieved_timestamp": "1762652579.975735", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "agentlans/Gemma2-9B-AdvancedFuse", + "developer": "google", + "inference_platform": "unknown", + "id": "agentlans/Gemma2-9B-AdvancedFuse" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15427288483446144 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.585936684475517 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10045317220543806 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3347315436241611 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4230833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4000166223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp1-2.6B/e52ac657-26a3-499a-949f-bf2a0b620d8e.json b/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp1-2.6B/e52ac657-26a3-499a-949f-bf2a0b620d8e.json new file mode 100644 index 000000000..6419fe7b7 --- /dev/null +++ b/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp1-2.6B/e52ac657-26a3-499a-949f-bf2a0b620d8e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp1-2.6B/1762652579.985875", + "retrieved_timestamp": "1762652579.985876", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Gemma2Slerp1-2.6B", + "developer": "google", + "inference_platform": "unknown", + "id": "allknowingroger/Gemma2Slerp1-2.6B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5354348683714766 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4343094462630086 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45616666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26886635638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp1-27B/42d79295-bdb0-411d-b1b0-5cff954e925c.json b/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp1-27B/42d79295-bdb0-411d-b1b0-5cff954e925c.json new file mode 100644 index 000000000..2418f5dea --- /dev/null +++ b/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp1-27B/42d79295-bdb0-411d-b1b0-5cff954e925c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp1-27B/1762652579.986121", + "retrieved_timestamp": "1762652579.986122", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Gemma2Slerp1-27B", + "developer": "google", + "inference_platform": "unknown", + "id": "allknowingroger/Gemma2Slerp1-27B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7186332265056716 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6398902146527521 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2583081570996979 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3640939597315436 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47671875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44564494680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp2-2.6B/eeb46285-0c8d-43b7-9b6d-e86c24064fde.json b/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp2-2.6B/eeb46285-0c8d-43b7-9b6d-e86c24064fde.json new file mode 100644 index 000000000..b3c302fb0 --- /dev/null +++ b/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp2-2.6B/eeb46285-0c8d-43b7-9b6d-e86c24064fde.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp2-2.6B/1762652579.98633", + "retrieved_timestamp": "1762652579.98633", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Gemma2Slerp2-2.6B", + "developer": "google", + "inference_platform": "unknown", + "id": "allknowingroger/Gemma2Slerp2-2.6B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5747272791748117 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4307646783089521 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09063444108761329 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44677083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26961436170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp2-27B/1f2c33e8-2d7b-4bd5-81e8-1c9bcae0ae8f.json b/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp2-27B/1f2c33e8-2d7b-4bd5-81e8-1c9bcae0ae8f.json new file mode 100644 index 000000000..1ba58c357 --- /dev/null +++ b/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp2-27B/1f2c33e8-2d7b-4bd5-81e8-1c9bcae0ae8f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp2-27B/1762652579.986531", + "retrieved_timestamp": "1762652579.9865322", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Gemma2Slerp2-27B", + "developer": "google", + "inference_platform": "unknown", + "id": "allknowingroger/Gemma2Slerp2-27B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7545534736720789 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6557274121032689 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27870090634441086 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3699664429530201 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46208333333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46226728723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp3-27B/648810d4-4dd5-48c7-a4d7-b3d9d2f3f3f2.json b/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp3-27B/648810d4-4dd5-48c7-a4d7-b3d9d2f3f3f2.json new file mode 100644 index 000000000..b71b1c599 --- /dev/null +++ b/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp3-27B/648810d4-4dd5-48c7-a4d7-b3d9d2f3f3f2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp3-27B/1762652579.986752", + "retrieved_timestamp": "1762652579.986753", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Gemma2Slerp3-27B", + "developer": "google", + "inference_platform": "unknown", + "id": "allknowingroger/Gemma2Slerp3-27B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7426384216102164 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6499638721230724 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27416918429003023 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3548657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47402083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4640957446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp4-27B/f94f3bf1-cf85-4673-a5cf-368f250233e4.json b/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp4-27B/f94f3bf1-cf85-4673-a5cf-368f250233e4.json new file mode 100644 index 000000000..0489aac8e --- /dev/null +++ b/data/hfopenllm_v2/google/allknowingroger/Gemma2Slerp4-27B/f94f3bf1-cf85-4673-a5cf-368f250233e4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp4-27B/1762652579.986965", + "retrieved_timestamp": "1762652579.9869661", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Gemma2Slerp4-27B", + "developer": "google", + "inference_platform": "unknown", + "id": "allknowingroger/Gemma2Slerp4-27B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7496575752337131 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6529581339749019 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2719033232628399 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36661073825503354 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4502395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46492686170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp-9B/3aed9fd2-45bd-4568-8885-7fc2370bb26d.json b/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp-9B/3aed9fd2-45bd-4568-8885-7fc2370bb26d.json new file mode 100644 index 000000000..3870f37d2 --- /dev/null +++ b/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp-9B/3aed9fd2-45bd-4568-8885-7fc2370bb26d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaSlerp-9B/1762652579.987181", + "retrieved_timestamp": "1762652579.9871821", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/GemmaSlerp-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "allknowingroger/GemmaSlerp-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.704320092909037 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.592057786577488 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21601208459214502 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34395973154362414 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46732291666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41605718085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp2-9B/99333370-c7d5-4763-b3a4-14adde0fab9e.json b/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp2-9B/99333370-c7d5-4763-b3a4-14adde0fab9e.json new file mode 100644 index 000000000..b1fecde55 --- /dev/null +++ b/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp2-9B/99333370-c7d5-4763-b3a4-14adde0fab9e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaSlerp2-9B/1762652579.987394", + "retrieved_timestamp": "1762652579.987395", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/GemmaSlerp2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "allknowingroger/GemmaSlerp2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7281003293483512 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.598271299766216 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2107250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3523489932885906 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47671875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42386968085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp4-10B/32e38c82-d412-4888-9d9d-f89aef0989fd.json b/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp4-10B/32e38c82-d412-4888-9d9d-f89aef0989fd.json new file mode 100644 index 000000000..17b69a9ea --- /dev/null +++ b/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp4-10B/32e38c82-d412-4888-9d9d-f89aef0989fd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaSlerp4-10B/1762652579.9875991", + "retrieved_timestamp": "1762652579.9875998", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/GemmaSlerp4-10B", + "developer": "google", + "inference_platform": "unknown", + "id": "allknowingroger/GemmaSlerp4-10B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7326216660682544 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6027862253440982 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2243202416918429 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35318791946308725 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45398958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4250332446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp5-10B/e325b56f-4306-4e37-adc5-c09b300a8c30.json b/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp5-10B/e325b56f-4306-4e37-adc5-c09b300a8c30.json new file mode 100644 index 000000000..4ab0552c7 --- /dev/null +++ b/data/hfopenllm_v2/google/allknowingroger/GemmaSlerp5-10B/e325b56f-4306-4e37-adc5-c09b300a8c30.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaSlerp5-10B/1762652579.9878101", + "retrieved_timestamp": "1762652579.987811", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/GemmaSlerp5-10B", + "developer": "google", + "inference_platform": "unknown", + "id": "allknowingroger/GemmaSlerp5-10B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7353444416370785 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.605447654436423 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21827794561933533 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3523489932885906 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46078125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4328457446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/allknowingroger/GemmaStock1-27B/0b19d8bb-1952-4515-8d29-e55e1106e92b.json b/data/hfopenllm_v2/google/allknowingroger/GemmaStock1-27B/0b19d8bb-1952-4515-8d29-e55e1106e92b.json new file mode 100644 index 000000000..f4e64aa81 --- /dev/null +++ b/data/hfopenllm_v2/google/allknowingroger/GemmaStock1-27B/0b19d8bb-1952-4515-8d29-e55e1106e92b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaStock1-27B/1762652579.9880252", + "retrieved_timestamp": "1762652579.9880252", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/GemmaStock1-27B", + "developer": "google", + "inference_platform": "unknown", + "id": "allknowingroger/GemmaStock1-27B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7509064836855099 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6565607454366021 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.263595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3640939597315436 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45268749999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47298869680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/anakin87/gemma-2b-orpo/80531a18-00d3-4264-bf84-cd1d4d90df08.json b/data/hfopenllm_v2/google/anakin87/gemma-2b-orpo/80531a18-00d3-4264-bf84-cd1d4d90df08.json new file mode 100644 index 000000000..57ed13a54 --- /dev/null +++ b/data/hfopenllm_v2/google/anakin87/gemma-2b-orpo/80531a18-00d3-4264-bf84-cd1d4d90df08.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/anakin87_gemma-2b-orpo/1762652580.010973", + "retrieved_timestamp": "1762652580.010974", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "anakin87/gemma-2b-orpo", + "developer": "google", + "inference_platform": "unknown", + "id": "anakin87/gemma-2b-orpo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24779695651981187 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34261709435617754 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0188821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37276041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1305684840425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/anthracite-org/magnum-v3-9b-customgemma2/865b86aa-7b8d-4619-aa57-3c57cc4c7b51.json b/data/hfopenllm_v2/google/anthracite-org/magnum-v3-9b-customgemma2/865b86aa-7b8d-4619-aa57-3c57cc4c7b51.json new file mode 100644 index 000000000..13953df14 --- /dev/null +++ b/data/hfopenllm_v2/google/anthracite-org/magnum-v3-9b-customgemma2/865b86aa-7b8d-4619-aa57-3c57cc4c7b51.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v3-9b-customgemma2/1762652580.012768", + "retrieved_timestamp": "1762652580.012769", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "anthracite-org/magnum-v3-9b-customgemma2", + "developer": "google", + "inference_platform": "unknown", + "id": "anthracite-org/magnum-v3-9b-customgemma2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1272955757390391 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5340136936916174 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07175226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288590604026846 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45646875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4204621010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/beomi/gemma-mling-7b/2568a2b7-e95c-4224-9850-5816466b50f2.json b/data/hfopenllm_v2/google/beomi/gemma-mling-7b/2568a2b7-e95c-4224-9850-5816466b50f2.json new file mode 100644 index 000000000..c9f2ce1b6 --- /dev/null +++ b/data/hfopenllm_v2/google/beomi/gemma-mling-7b/2568a2b7-e95c-4224-9850-5816466b50f2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/beomi_gemma-mling-7b/1762652580.030431", + "retrieved_timestamp": "1762652580.030431", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "beomi/gemma-mling-7b", + "developer": "google", + "inference_platform": "unknown", + "id": "beomi/gemma-mling-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20290939152559653 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40675941947154004 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37585416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2632978723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/bunnycore/Gemma-2-2B-Smart/ebada07f-e700-4f38-aec0-f801959969e6.json b/data/hfopenllm_v2/google/bunnycore/Gemma-2-2B-Smart/ebada07f-e700-4f38-aec0-f801959969e6.json new file mode 100644 index 000000000..4fecca528 --- /dev/null +++ b/data/hfopenllm_v2/google/bunnycore/Gemma-2-2B-Smart/ebada07f-e700-4f38-aec0-f801959969e6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Gemma-2-2B-Smart/1762652580.044707", + "retrieved_timestamp": "1762652580.044708", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Gemma-2-2B-Smart", + "developer": "google", + "inference_platform": "unknown", + "id": "bunnycore/Gemma-2-2B-Smart" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13206625088099574 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39742674570492836 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03323262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4248541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2426030585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/bunnycore/Gemma2-9B-TitanFusion/95a2d032-e2a4-46df-84d2-6b7529d5bb01.json b/data/hfopenllm_v2/google/bunnycore/Gemma2-9B-TitanFusion/95a2d032-e2a4-46df-84d2-6b7529d5bb01.json new file mode 100644 index 000000000..129ecba1a --- /dev/null +++ b/data/hfopenllm_v2/google/bunnycore/Gemma2-9B-TitanFusion/95a2d032-e2a4-46df-84d2-6b7529d5bb01.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Gemma2-9B-TitanFusion/1762652580.044988", + "retrieved_timestamp": "1762652580.0449889", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Gemma2-9B-TitanFusion", + "developer": "google", + "inference_platform": "unknown", + "id": "bunnycore/Gemma2-9B-TitanFusion" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16184169115724056 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5712026020785131 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0770392749244713 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33221476510067116 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41362499999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39602726063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1/af7a7129-1b6a-4ff5-952f-075ae4f7c137.json b/data/hfopenllm_v2/google/cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1/af7a7129-1b6a-4ff5-952f-075ae4f7c137.json new file mode 100644 index 000000000..3bbd8aa86 --- /dev/null +++ b/data/hfopenllm_v2/google/cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1/af7a7129-1b6a-4ff5-952f-075ae4f7c137.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cat-searcher_gemma-2-9b-it-sppo-iter-1-evol-1/1762652580.099224", + "retrieved_timestamp": "1762652580.099225", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1", + "developer": "google", + "inference_platform": "unknown", + "id": "cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2941827683878775 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5939369622672414 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08534743202416918 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34060402684563756 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39257291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37998670212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/cat-searcher/gemma-2-9b-it-sppo-iter-1/3c33f6b0-dc40-4a61-bbbe-063b9d8d30e3.json b/data/hfopenllm_v2/google/cat-searcher/gemma-2-9b-it-sppo-iter-1/3c33f6b0-dc40-4a61-bbbe-063b9d8d30e3.json new file mode 100644 index 000000000..2e214dea8 --- /dev/null +++ b/data/hfopenllm_v2/google/cat-searcher/gemma-2-9b-it-sppo-iter-1/3c33f6b0-dc40-4a61-bbbe-063b9d8d30e3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cat-searcher_gemma-2-9b-it-sppo-iter-1/1762652580.091131", + "retrieved_timestamp": "1762652580.091137", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cat-searcher/gemma-2-9b-it-sppo-iter-1", + "developer": "google", + "inference_platform": "unknown", + "id": "cat-searcher/gemma-2-9b-it-sppo-iter-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30147674836101546 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5971867698707507 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3447986577181208 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39266666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38538896276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/cognitivecomputations/dolphin-2.9.4-gemma2-2b/29a10f53-dd38-437b-a7f3-9756035df640.json b/data/hfopenllm_v2/google/cognitivecomputations/dolphin-2.9.4-gemma2-2b/29a10f53-dd38-437b-a7f3-9756035df640.json new file mode 100644 index 000000000..cfe44da24 --- /dev/null +++ b/data/hfopenllm_v2/google/cognitivecomputations/dolphin-2.9.4-gemma2-2b/29a10f53-dd38-437b-a7f3-9756035df640.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.4-gemma2-2b/1762652580.115823", + "retrieved_timestamp": "1762652580.115823", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cognitivecomputations/dolphin-2.9.4-gemma2-2b", + "developer": "google", + "inference_platform": "unknown", + "id": "cognitivecomputations/dolphin-2.9.4-gemma2-2b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08955127949396491 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40813187411055213 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04909365558912387 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41796875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2105219414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/djuna/Gemma-2-gemmama-9b/b2f24392-29aa-4a24-b489-87ea9b85daea.json b/data/hfopenllm_v2/google/djuna/Gemma-2-gemmama-9b/b2f24392-29aa-4a24-b489-87ea9b85daea.json new file mode 100644 index 000000000..aba38c445 --- /dev/null +++ b/data/hfopenllm_v2/google/djuna/Gemma-2-gemmama-9b/b2f24392-29aa-4a24-b489-87ea9b85daea.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/djuna_Gemma-2-gemmama-9b/1762652580.12782", + "retrieved_timestamp": "1762652580.127821", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "djuna/Gemma-2-gemmama-9b", + "developer": "google", + "inference_platform": "unknown", + "id": "djuna/Gemma-2-gemmama-9b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7703404743857409 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5420037856495951 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19259818731117825 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33557046979865773 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4031458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3109208776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/dwikitheduck/gemma-2-2b-id-inst/6d66b056-c83d-49b8-ac84-04396c0d97df.json b/data/hfopenllm_v2/google/dwikitheduck/gemma-2-2b-id-inst/6d66b056-c83d-49b8-ac84-04396c0d97df.json new file mode 100644 index 000000000..8dd0d9b81 --- /dev/null +++ b/data/hfopenllm_v2/google/dwikitheduck/gemma-2-2b-id-inst/6d66b056-c83d-49b8-ac84-04396c0d97df.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dwikitheduck_gemma-2-2b-id-inst/1762652580.137194", + "retrieved_timestamp": "1762652580.137195", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dwikitheduck/gemma-2-2b-id-inst", + "developer": "google", + "inference_platform": "unknown", + "id": "dwikitheduck/gemma-2-2b-id-inst" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38785644312646006 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39621721241423097 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41542708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21733710106382978 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/dwikitheduck/gemma-2-2b-id/000b7f0b-9e2f-499a-9bab-b08767efb8ca.json b/data/hfopenllm_v2/google/dwikitheduck/gemma-2-2b-id/000b7f0b-9e2f-499a-9bab-b08767efb8ca.json new file mode 100644 index 000000000..318d55a33 --- /dev/null +++ b/data/hfopenllm_v2/google/dwikitheduck/gemma-2-2b-id/000b7f0b-9e2f-499a-9bab-b08767efb8ca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dwikitheduck_gemma-2-2b-id/1762652580.136933", + "retrieved_timestamp": "1762652580.136933", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dwikitheduck/gemma-2-2b-id", + "developer": "google", + "inference_platform": "unknown", + "id": "dwikitheduck/gemma-2-2b-id" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38785644312646006 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39621721241423097 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41542708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21733710106382978 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/ehristoforu/Gemma2-9B-it-psy10k-mental_health/25c93024-ce65-49d5-96da-00107bb37f77.json b/data/hfopenllm_v2/google/ehristoforu/Gemma2-9B-it-psy10k-mental_health/25c93024-ce65-49d5-96da-00107bb37f77.json new file mode 100644 index 000000000..f0b08fea4 --- /dev/null +++ b/data/hfopenllm_v2/google/ehristoforu/Gemma2-9B-it-psy10k-mental_health/25c93024-ce65-49d5-96da-00107bb37f77.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_Gemma2-9B-it-psy10k-mental_health/1762652580.139083", + "retrieved_timestamp": "1762652580.139084", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/Gemma2-9B-it-psy10k-mental_health", + "developer": "google", + "inference_platform": "unknown", + "id": "ehristoforu/Gemma2-9B-it-psy10k-mental_health" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5886658510529839 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5539376944027642 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16314199395770393 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.337248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40860416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38289561170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/ehristoforu/Gemma2-9b-it-train6/e289e629-17dd-440e-8839-d5dcbe535fd6.json b/data/hfopenllm_v2/google/ehristoforu/Gemma2-9b-it-train6/e289e629-17dd-440e-8839-d5dcbe535fd6.json new file mode 100644 index 000000000..f85136e91 --- /dev/null +++ b/data/hfopenllm_v2/google/ehristoforu/Gemma2-9b-it-train6/e289e629-17dd-440e-8839-d5dcbe535fd6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_Gemma2-9b-it-train6/1762652580.1393359", + "retrieved_timestamp": "1762652580.139337", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/Gemma2-9b-it-train6", + "developer": "google", + "inference_platform": "unknown", + "id": "ehristoforu/Gemma2-9b-it-train6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7025215317579578 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5898092579133603 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19108761329305135 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288590604026846 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40841666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39419880319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/ell44ot/gemma-2b-def/9ba31c7b-13df-46f2-a164-1729563707e1.json b/data/hfopenllm_v2/google/ell44ot/gemma-2b-def/9ba31c7b-13df-46f2-a164-1729563707e1.json new file mode 100644 index 000000000..052cbf5e4 --- /dev/null +++ b/data/hfopenllm_v2/google/ell44ot/gemma-2b-def/9ba31c7b-13df-46f2-a164-1729563707e1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ell44ot_gemma-2b-def/1762652580.147274", + "retrieved_timestamp": "1762652580.147275", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ell44ot/gemma-2b-def", + "developer": "google", + "inference_platform": "unknown", + "id": "ell44ot/gemma-2b-def" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26930433472076315 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31586532094752634 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02416918429003021 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36702083333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15724734042553193 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GemmaModel", + "params_billions": 1.546 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/flan-t5-base/69eb63bf-72dd-4995-a8ec-49fd304a8ee7.json b/data/hfopenllm_v2/google/flan-t5-base/69eb63bf-72dd-4995-a8ec-49fd304a8ee7.json new file mode 100644 index 000000000..2abcd5be2 --- /dev/null +++ b/data/hfopenllm_v2/google/flan-t5-base/69eb63bf-72dd-4995-a8ec-49fd304a8ee7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_flan-t5-base/1762652580.172907", + "retrieved_timestamp": "1762652580.172908", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/flan-t5-base", + "developer": "google", + "inference_platform": "unknown", + "id": "google/flan-t5-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18907055501624578 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3525980599300322 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23825503355704697 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36711458333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13572140957446807 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 0.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/flan-t5-large/eb2e1202-9292-4f5e-a366-abc84897c66d.json b/data/hfopenllm_v2/google/flan-t5-large/eb2e1202-9292-4f5e-a366-abc84897c66d.json new file mode 100644 index 000000000..a2ef83dd4 --- /dev/null +++ b/data/hfopenllm_v2/google/flan-t5-large/eb2e1202-9292-4f5e-a366-abc84897c66d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_flan-t5-large/1762652580.173132", + "retrieved_timestamp": "1762652580.1731331", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/flan-t5-large", + "developer": "google", + "inference_platform": "unknown", + "id": "google/flan-t5-large" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22009490374428736 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41531150356794316 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25083892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40832291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17087765957446807 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 0.783 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/flan-t5-small/368a36c5-8211-4240-ac88-3fd5e5414310.json b/data/hfopenllm_v2/google/flan-t5-small/368a36c5-8211-4240-ac88-3fd5e5414310.json new file mode 100644 index 000000000..1158cb78a --- /dev/null +++ b/data/hfopenllm_v2/google/flan-t5-small/368a36c5-8211-4240-ac88-3fd5e5414310.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_flan-t5-small/1762652580.173366", + "retrieved_timestamp": "1762652580.173366", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/flan-t5-small", + "developer": "google", + "inference_platform": "unknown", + "id": "google/flan-t5-small" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1524255641697363 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3282901097640842 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0075528700906344415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41229166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1233377659574468 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 0.077 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/flan-t5-xl/98a6a294-7b5d-4279-8aa6-6ed16248ce0b.json b/data/hfopenllm_v2/google/flan-t5-xl/98a6a294-7b5d-4279-8aa6-6ed16248ce0b.json new file mode 100644 index 000000000..27e3e4c15 --- /dev/null +++ b/data/hfopenllm_v2/google/flan-t5-xl/98a6a294-7b5d-4279-8aa6-6ed16248ce0b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_flan-t5-xl/1762652580.1738272", + "retrieved_timestamp": "1762652580.1738281", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/flan-t5-xl", + "developer": "google", + "inference_platform": "unknown", + "id": "google/flan-t5-xl" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2206944241279804 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45372172155693963 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0007552870090634442 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24580536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42203125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21417885638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 2.85 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/flan-t5-xl/ab0ac321-1c2b-4523-b48c-de47ff06e7a3.json b/data/hfopenllm_v2/google/flan-t5-xl/ab0ac321-1c2b-4523-b48c-de47ff06e7a3.json new file mode 100644 index 000000000..caaad62bc --- /dev/null +++ b/data/hfopenllm_v2/google/flan-t5-xl/ab0ac321-1c2b-4523-b48c-de47ff06e7a3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_flan-t5-xl/1762652580.173602", + "retrieved_timestamp": "1762652580.173603", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/flan-t5-xl", + "developer": "google", + "inference_platform": "unknown", + "id": "google/flan-t5-xl" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22374189373085634 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45310636062112314 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0075528700906344415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41809375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21467752659574468 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 2.85 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/flan-t5-xxl/e15f4783-510e-4b92-a999-072caa425d4c.json b/data/hfopenllm_v2/google/flan-t5-xxl/e15f4783-510e-4b92-a999-072caa425d4c.json new file mode 100644 index 000000000..2476c4752 --- /dev/null +++ b/data/hfopenllm_v2/google/flan-t5-xxl/e15f4783-510e-4b92-a999-072caa425d4c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_flan-t5-xxl/1762652580.174026", + "retrieved_timestamp": "1762652580.174026", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/flan-t5-xxl", + "developer": "google", + "inference_platform": "unknown", + "id": "google/flan-t5-xxl" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2200450360598767 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5065888015776924 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42175 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23429188829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 11.267 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/flan-ul2/99941572-3e23-467c-97df-dfe1a2aa9805.json b/data/hfopenllm_v2/google/flan-ul2/99941572-3e23-467c-97df-dfe1a2aa9805.json new file mode 100644 index 000000000..3323a8057 --- /dev/null +++ b/data/hfopenllm_v2/google/flan-ul2/99941572-3e23-467c-97df-dfe1a2aa9805.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_flan-ul2/1762652580.174251", + "retrieved_timestamp": "1762652580.174251", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/flan-ul2", + "developer": "google", + "inference_platform": "unknown", + "id": "google/flan-ul2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23925406809487715 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5053738049125648 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3843541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24933510638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 19.46 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/codegemma-1.1-2b/9d92e421-c458-4ad3-b9bf-45c0ca1b90cf.json b/data/hfopenllm_v2/google/google/codegemma-1.1-2b/9d92e421-c458-4ad3-b9bf-45c0ca1b90cf.json new file mode 100644 index 000000000..c113455c9 --- /dev/null +++ b/data/hfopenllm_v2/google/google/codegemma-1.1-2b/9d92e421-c458-4ad3-b9bf-45c0ca1b90cf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_codegemma-1.1-2b/1762652580.172607", + "retrieved_timestamp": "1762652580.172608", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/codegemma-1.1-2b", + "developer": "google", + "inference_platform": "unknown", + "id": "google/codegemma-1.1-2b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22936253584932426 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3353417790248454 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3871458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1278257978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/gemma-1.1-2b-it/5ed676b6-4aff-4d71-a91a-6d5d9feeb28f.json b/data/hfopenllm_v2/google/google/gemma-1.1-2b-it/5ed676b6-4aff-4d71-a91a-6d5d9feeb28f.json new file mode 100644 index 000000000..dc3f1c19d --- /dev/null +++ b/data/hfopenllm_v2/google/google/gemma-1.1-2b-it/5ed676b6-4aff-4d71-a91a-6d5d9feeb28f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_gemma-1.1-2b-it/1762652580.1745641", + "retrieved_timestamp": "1762652580.174565", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/gemma-1.1-2b-it", + "developer": "google", + "inference_platform": "unknown", + "id": "google/gemma-1.1-2b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30674831668860847 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3184634974814922 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01812688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33939583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14835438829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/gemma-1.1-7b-it/6929c338-76a5-4386-9fa8-68e35a989a86.json b/data/hfopenllm_v2/google/google/gemma-1.1-7b-it/6929c338-76a5-4386-9fa8-68e35a989a86.json new file mode 100644 index 000000000..90fa3ab08 --- /dev/null +++ b/data/hfopenllm_v2/google/google/gemma-1.1-7b-it/6929c338-76a5-4386-9fa8-68e35a989a86.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_gemma-1.1-7b-it/1762652580.1748302", + "retrieved_timestamp": "1762652580.1748302", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/gemma-1.1-7b-it", + "developer": "google", + "inference_platform": "unknown", + "id": "google/gemma-1.1-7b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5039107346285633 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3935297962833251 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04909365558912387 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42302083333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2583942819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/gemma-2-27b-it/5bcf96ce-efd1-4f90-91a1-edd548de71ad.json b/data/hfopenllm_v2/google/google/gemma-2-27b-it/5bcf96ce-efd1-4f90-91a1-edd548de71ad.json new file mode 100644 index 000000000..f4a1a2921 --- /dev/null +++ b/data/hfopenllm_v2/google/google/gemma-2-27b-it/5bcf96ce-efd1-4f90-91a1-edd548de71ad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_gemma-2-27b-it/1762652580.17537", + "retrieved_timestamp": "1762652580.175371", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/gemma-2-27b-it", + "developer": "google", + "inference_platform": "unknown", + "id": "google/gemma-2-27b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7977677008116243 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6451387433168799 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23867069486404835 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40330208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4451462765957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/gemma-2-27b/12f7d5a6-3f8b-49d8-9ca8-38774dbcca92.json b/data/hfopenllm_v2/google/google/gemma-2-27b/12f7d5a6-3f8b-49d8-9ca8-38774dbcca92.json new file mode 100644 index 000000000..59f8f5644 --- /dev/null +++ b/data/hfopenllm_v2/google/google/gemma-2-27b/12f7d5a6-3f8b-49d8-9ca8-38774dbcca92.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_gemma-2-27b/1762652580.175144", + "retrieved_timestamp": "1762652580.175145", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/gemma-2-27b", + "developer": "google", + "inference_platform": "unknown", + "id": "google/gemma-2-27b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24752213017017072 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5642908317482057 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35067114093959734 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43963541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4370844414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/gemma-2-2b-it/64daa9ea-cf1e-4787-90cf-ed72c5e23afd.json b/data/hfopenllm_v2/google/google/gemma-2-2b-it/64daa9ea-cf1e-4787-90cf-ed72c5e23afd.json new file mode 100644 index 000000000..c20168b19 --- /dev/null +++ b/data/hfopenllm_v2/google/google/gemma-2-2b-it/64daa9ea-cf1e-4787-90cf-ed72c5e23afd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_gemma-2-2b-it/1762652580.176172", + "retrieved_timestamp": "1762652580.176194", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/gemma-2-2b-it", + "developer": "google", + "inference_platform": "unknown", + "id": "google/gemma-2-2b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5668337788179807 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41992308914274706 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0007552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39288541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25498670212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "InternLM2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/gemma-2-2b-jpn-it/251b93fa-6f12-41bc-85c8-ded52e1a0d2d.json b/data/hfopenllm_v2/google/google/gemma-2-2b-jpn-it/251b93fa-6f12-41bc-85c8-ded52e1a0d2d.json new file mode 100644 index 000000000..26f68cc2e --- /dev/null +++ b/data/hfopenllm_v2/google/google/gemma-2-2b-jpn-it/251b93fa-6f12-41bc-85c8-ded52e1a0d2d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_gemma-2-2b-jpn-it/1762652580.1767948", + "retrieved_timestamp": "1762652580.176796", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/gemma-2-2b-jpn-it", + "developer": "google", + "inference_platform": "unknown", + "id": "google/gemma-2-2b-jpn-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5288401441508531 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4178440226217119 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37276041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2466755319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/gemma-2-2b-jpn-it/a09fdbce-489c-4d14-a05f-7663121bece7.json b/data/hfopenllm_v2/google/google/gemma-2-2b-jpn-it/a09fdbce-489c-4d14-a05f-7663121bece7.json new file mode 100644 index 000000000..de5c8169f --- /dev/null +++ b/data/hfopenllm_v2/google/google/gemma-2-2b-jpn-it/a09fdbce-489c-4d14-a05f-7663121bece7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_gemma-2-2b-jpn-it/1762652580.176506", + "retrieved_timestamp": "1762652580.176507", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/gemma-2-2b-jpn-it", + "developer": "google", + "inference_platform": "unknown", + "id": "google/gemma-2-2b-jpn-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5077826832803628 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42255698900658106 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39638541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2578125 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/gemma-2-2b/07e74f27-e0c3-448f-9a8c-a07ff8a73178.json b/data/hfopenllm_v2/google/google/gemma-2-2b/07e74f27-e0c3-448f-9a8c-a07ff8a73178.json new file mode 100644 index 000000000..cf6465434 --- /dev/null +++ b/data/hfopenllm_v2/google/google/gemma-2-2b/07e74f27-e0c3-448f-9a8c-a07ff8a73178.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_gemma-2-2b/1762652580.175597", + "retrieved_timestamp": "1762652580.1755981", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/gemma-2-2b", + "developer": "google", + "inference_platform": "unknown", + "id": "google/gemma-2-2b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19931226922343825 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3655966996422591 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028700906344410877 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4231770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21800199468085107 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "InternLM2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/gemma-2-2b/53fb75b1-2d9f-4af3-a358-18bf5d4a9032.json b/data/hfopenllm_v2/google/google/gemma-2-2b/53fb75b1-2d9f-4af3-a358-18bf5d4a9032.json new file mode 100644 index 000000000..e9df8fe55 --- /dev/null +++ b/data/hfopenllm_v2/google/google/gemma-2-2b/53fb75b1-2d9f-4af3-a358-18bf5d4a9032.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_gemma-2-2b/1762652580.1759539", + "retrieved_timestamp": "1762652580.175955", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/gemma-2-2b", + "developer": "google", + "inference_platform": "unknown", + "id": "google/gemma-2-2b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20176021844262113 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3708674612470255 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030211480362537766 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.421875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22165890957446807 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "InternLM2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/gemma-2-9b-it/e8cef406-d6cc-48bd-872f-3d5b74bcf092.json b/data/hfopenllm_v2/google/google/gemma-2-9b-it/e8cef406-d6cc-48bd-872f-3d5b74bcf092.json new file mode 100644 index 000000000..9a14c1bcd --- /dev/null +++ b/data/hfopenllm_v2/google/google/gemma-2-9b-it/e8cef406-d6cc-48bd-872f-3d5b74bcf092.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_gemma-2-9b-it/1762652580.177257", + "retrieved_timestamp": "1762652580.177258", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/gemma-2-9b-it", + "developer": "google", + "inference_platform": "unknown", + "id": "google/gemma-2-9b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7435626360279614 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5990342504164132 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19486404833836857 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36073825503355705 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4072708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3875498670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/gemma-2-9b/2ac50111-a850-4bd2-8136-c373990742a5.json b/data/hfopenllm_v2/google/google/gemma-2-9b/2ac50111-a850-4bd2-8136-c373990742a5.json new file mode 100644 index 000000000..a19951205 --- /dev/null +++ b/data/hfopenllm_v2/google/google/gemma-2-9b/2ac50111-a850-4bd2-8136-c373990742a5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_gemma-2-9b/1762652580.177011", + "retrieved_timestamp": "1762652580.177012", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/gemma-2-9b", + "developer": "google", + "inference_platform": "unknown", + "id": "google/gemma-2-9b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20398320899657355 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5377373397621884 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13444108761329304 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288590604026846 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4461145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4103224734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/gemma-2b-it/50dffd1a-ddf5-40fd-a2c8-e5dd140af617.json b/data/hfopenllm_v2/google/google/gemma-2b-it/50dffd1a-ddf5-40fd-a2c8-e5dd140af617.json new file mode 100644 index 000000000..6e49827d8 --- /dev/null +++ b/data/hfopenllm_v2/google/google/gemma-2b-it/50dffd1a-ddf5-40fd-a2c8-e5dd140af617.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_gemma-2b-it/1762652580.17777", + "retrieved_timestamp": "1762652580.17777", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/gemma-2b-it", + "developer": "google", + "inference_platform": "unknown", + "id": "google/gemma-2b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26902950837112194 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31508191988788464 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.334125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13530585106382978 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/gemma-2b/2dd86ebc-0253-4801-ac99-2bb3494ad29b.json b/data/hfopenllm_v2/google/google/gemma-2b/2dd86ebc-0253-4801-ac99-2bb3494ad29b.json new file mode 100644 index 000000000..17adafbab --- /dev/null +++ b/data/hfopenllm_v2/google/google/gemma-2b/2dd86ebc-0253-4801-ac99-2bb3494ad29b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_gemma-2b/1762652580.177512", + "retrieved_timestamp": "1762652580.177513", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/gemma-2b", + "developer": "google", + "inference_platform": "unknown", + "id": "google/gemma-2b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20375825033134307 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33656381705857935 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030211480362537766 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2550335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39778125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13655252659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/gemma-7b-it/30146048-ee0f-431d-b3e7-8c066c820740.json b/data/hfopenllm_v2/google/google/gemma-7b-it/30146048-ee0f-431d-b3e7-8c066c820740.json new file mode 100644 index 000000000..e18ad115d --- /dev/null +++ b/data/hfopenllm_v2/google/google/gemma-7b-it/30146048-ee0f-431d-b3e7-8c066c820740.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_gemma-7b-it/1762652580.178242", + "retrieved_timestamp": "1762652580.1782432", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/gemma-7b-it", + "developer": "google", + "inference_platform": "unknown", + "id": "google/gemma-7b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3868324933398937 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36459012743300967 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42742708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16946476063829788 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/gemma-7b/630e3cc0-fccc-41b3-b439-85a875dae401.json b/data/hfopenllm_v2/google/google/gemma-7b/630e3cc0-fccc-41b3-b439-85a875dae401.json new file mode 100644 index 000000000..7b466c7d2 --- /dev/null +++ b/data/hfopenllm_v2/google/google/gemma-7b/630e3cc0-fccc-41b3-b439-85a875dae401.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_gemma-7b/1762652580.1780128", + "retrieved_timestamp": "1762652580.178014", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/gemma-7b", + "developer": "google", + "inference_platform": "unknown", + "id": "google/gemma-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2659321710838353 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43615285239286355 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07401812688821752 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4062395833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2947972074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GemmaForCausalLM", + "params_billions": 8.538 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/recurrentgemma-2b-it/a219b160-3dbd-4dcd-b39d-d12c6f9b1145.json b/data/hfopenllm_v2/google/google/recurrentgemma-2b-it/a219b160-3dbd-4dcd-b39d-d12c6f9b1145.json new file mode 100644 index 000000000..78e1bce35 --- /dev/null +++ b/data/hfopenllm_v2/google/google/recurrentgemma-2b-it/a219b160-3dbd-4dcd-b39d-d12c6f9b1145.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_recurrentgemma-2b-it/1762652580.17961", + "retrieved_timestamp": "1762652580.179611", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/recurrentgemma-2b-it", + "developer": "google", + "inference_platform": "unknown", + "id": "google/recurrentgemma-2b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2949329999955673 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33300047272606553 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3340625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1402094414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "RecurrentGemmaForCausalLM", + "params_billions": 2.683 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/recurrentgemma-2b/218a5d0f-5242-43c4-8166-81f5c09626bb.json b/data/hfopenllm_v2/google/google/recurrentgemma-2b/218a5d0f-5242-43c4-8166-81f5c09626bb.json new file mode 100644 index 000000000..c1103dbb4 --- /dev/null +++ b/data/hfopenllm_v2/google/google/recurrentgemma-2b/218a5d0f-5242-43c4-8166-81f5c09626bb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_recurrentgemma-2b/1762652580.179393", + "retrieved_timestamp": "1762652580.179394", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/recurrentgemma-2b", + "developer": "google", + "inference_platform": "unknown", + "id": "google/recurrentgemma-2b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3017028151970106 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31973582830084474 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24580536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3445729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11760305851063829 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "RecurrentGemmaForCausalLM", + "params_billions": 2.683 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/recurrentgemma-9b-it/c7095b76-2d50-467b-a8d9-d7a277f1f14c.json b/data/hfopenllm_v2/google/google/recurrentgemma-9b-it/c7095b76-2d50-467b-a8d9-d7a277f1f14c.json new file mode 100644 index 000000000..7bb86b9ec --- /dev/null +++ b/data/hfopenllm_v2/google/google/recurrentgemma-9b-it/c7095b76-2d50-467b-a8d9-d7a277f1f14c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_recurrentgemma-9b-it/1762652580.180049", + "retrieved_timestamp": "1762652580.18005", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/recurrentgemma-9b-it", + "developer": "google", + "inference_platform": "unknown", + "id": "google/recurrentgemma-9b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5010383560065071 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4367189649027647 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43790625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2843251329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "RecurrentGemmaForCausalLM", + "params_billions": 9.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/google/recurrentgemma-9b/1ff3ab95-3007-4cbf-a146-5e8e4ae65404.json b/data/hfopenllm_v2/google/google/recurrentgemma-9b/1ff3ab95-3007-4cbf-a146-5e8e4ae65404.json new file mode 100644 index 000000000..4218dda52 --- /dev/null +++ b/data/hfopenllm_v2/google/google/recurrentgemma-9b/1ff3ab95-3007-4cbf-a146-5e8e4ae65404.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_recurrentgemma-9b/1762652580.17984", + "retrieved_timestamp": "1762652580.179841", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/recurrentgemma-9b", + "developer": "google", + "inference_platform": "unknown", + "id": "google/recurrentgemma-9b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31159434744256354 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39562568669428394 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3802604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2604720744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "RecurrentGemmaForCausalLM", + "params_billions": 9.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/grimjim/Gigantes-v1-gemma2-9b-it/57072a5e-1f64-4ae2-9e2c-caecc1dc05f4.json b/data/hfopenllm_v2/google/grimjim/Gigantes-v1-gemma2-9b-it/57072a5e-1f64-4ae2-9e2c-caecc1dc05f4.json new file mode 100644 index 000000000..46ab1077c --- /dev/null +++ b/data/hfopenllm_v2/google/grimjim/Gigantes-v1-gemma2-9b-it/57072a5e-1f64-4ae2-9e2c-caecc1dc05f4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_Gigantes-v1-gemma2-9b-it/1762652580.1819131", + "retrieved_timestamp": "1762652580.1819131", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/Gigantes-v1-gemma2-9b-it", + "developer": "google", + "inference_platform": "unknown", + "id": "grimjim/Gigantes-v1-gemma2-9b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.692454908531585 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.597792552822268 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21450151057401812 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35318791946308725 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45547916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42253989361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/grimjim/Gigantes-v2-gemma2-9b-it/47486923-2194-4b8e-930c-ca14bd5f8a26.json b/data/hfopenllm_v2/google/grimjim/Gigantes-v2-gemma2-9b-it/47486923-2194-4b8e-930c-ca14bd5f8a26.json new file mode 100644 index 000000000..81c4205cb --- /dev/null +++ b/data/hfopenllm_v2/google/grimjim/Gigantes-v2-gemma2-9b-it/47486923-2194-4b8e-930c-ca14bd5f8a26.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_Gigantes-v2-gemma2-9b-it/1762652580.182155", + "retrieved_timestamp": "1762652580.182156", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/Gigantes-v2-gemma2-9b-it", + "developer": "google", + "inference_platform": "unknown", + "id": "grimjim/Gigantes-v2-gemma2-9b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7350696152874374 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5986559388303995 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20166163141993956 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35151006711409394 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45947916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4259474734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/grimjim/Gigantes-v3-gemma2-9b-it/bb063d7a-65fa-416b-88e9-7bacdef1da3e.json b/data/hfopenllm_v2/google/grimjim/Gigantes-v3-gemma2-9b-it/bb063d7a-65fa-416b-88e9-7bacdef1da3e.json new file mode 100644 index 000000000..c4a70ca16 --- /dev/null +++ b/data/hfopenllm_v2/google/grimjim/Gigantes-v3-gemma2-9b-it/bb063d7a-65fa-416b-88e9-7bacdef1da3e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_Gigantes-v3-gemma2-9b-it/1762652580.182362", + "retrieved_timestamp": "1762652580.1823628", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/Gigantes-v3-gemma2-9b-it", + "developer": "google", + "inference_platform": "unknown", + "id": "grimjim/Gigantes-v3-gemma2-9b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.697625633319592 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5983513792324827 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20996978851963746 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3565436241610738 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4608125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4226230053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/grimjim/Magnolia-v1-Gemma2-8k-9B/2cf17692-b105-41df-9783-6c7728ab778f.json b/data/hfopenllm_v2/google/grimjim/Magnolia-v1-Gemma2-8k-9B/2cf17692-b105-41df-9783-6c7728ab778f.json new file mode 100644 index 000000000..a1751ce93 --- /dev/null +++ b/data/hfopenllm_v2/google/grimjim/Magnolia-v1-Gemma2-8k-9B/2cf17692-b105-41df-9783-6c7728ab778f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v1-Gemma2-8k-9B/1762652580.1841059", + "retrieved_timestamp": "1762652580.1841059", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/Magnolia-v1-Gemma2-8k-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "grimjim/Magnolia-v1-Gemma2-8k-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35308536904302806 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5589031767575711 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16842900302114805 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33640939597315433 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46446875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4242021276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/grimjim/Magnolia-v2-Gemma2-8k-9B/4d0574f4-4d91-4395-afff-133216eee509.json b/data/hfopenllm_v2/google/grimjim/Magnolia-v2-Gemma2-8k-9B/4d0574f4-4d91-4395-afff-133216eee509.json new file mode 100644 index 000000000..86d80bc0f --- /dev/null +++ b/data/hfopenllm_v2/google/grimjim/Magnolia-v2-Gemma2-8k-9B/4d0574f4-4d91-4395-afff-133216eee509.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v2-Gemma2-8k-9B/1762652580.184566", + "retrieved_timestamp": "1762652580.184567", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/Magnolia-v2-Gemma2-8k-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "grimjim/Magnolia-v2-Gemma2-8k-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7384417789243651 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6015773428405322 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2280966767371601 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3573825503355705 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44884375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4331781914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/grimjim/Magnolia-v3-Gemma2-8k-9B/8fff2cec-a733-4505-bce9-8b605044181a.json b/data/hfopenllm_v2/google/grimjim/Magnolia-v3-Gemma2-8k-9B/8fff2cec-a733-4505-bce9-8b605044181a.json new file mode 100644 index 000000000..221bfc080 --- /dev/null +++ b/data/hfopenllm_v2/google/grimjim/Magnolia-v3-Gemma2-8k-9B/8fff2cec-a733-4505-bce9-8b605044181a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v3-Gemma2-8k-9B/1762652580.1850398", + "retrieved_timestamp": "1762652580.185041", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/Magnolia-v3-Gemma2-8k-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "grimjim/Magnolia-v3-Gemma2-8k-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7378422585406721 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6015406636327695 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23187311178247735 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3565436241610738 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4488125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43367686170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/grimjim/Magot-v1-Gemma2-8k-9B/9e63ff64-f862-40ad-b594-31063ec0d31e.json b/data/hfopenllm_v2/google/grimjim/Magot-v1-Gemma2-8k-9B/9e63ff64-f862-40ad-b594-31063ec0d31e.json new file mode 100644 index 000000000..f4421a1c9 --- /dev/null +++ b/data/hfopenllm_v2/google/grimjim/Magot-v1-Gemma2-8k-9B/9e63ff64-f862-40ad-b594-31063ec0d31e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_Magot-v1-Gemma2-8k-9B/1762652580.185666", + "retrieved_timestamp": "1762652580.185667", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/Magot-v1-Gemma2-8k-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "grimjim/Magot-v1-Gemma2-8k-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29967818720993633 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6019447732218105 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09894259818731117 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3464765100671141 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44884375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43367686170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/grimjim/Magot-v2-Gemma2-8k-9B/2d250aa8-f3c5-4f9f-9e5c-dde8f720db53.json b/data/hfopenllm_v2/google/grimjim/Magot-v2-Gemma2-8k-9B/2d250aa8-f3c5-4f9f-9e5c-dde8f720db53.json new file mode 100644 index 000000000..c4f29f48b --- /dev/null +++ b/data/hfopenllm_v2/google/grimjim/Magot-v2-Gemma2-8k-9B/2d250aa8-f3c5-4f9f-9e5c-dde8f720db53.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_Magot-v2-Gemma2-8k-9B/1762652580.185882", + "retrieved_timestamp": "1762652580.1858828", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/Magot-v2-Gemma2-8k-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "grimjim/Magot-v2-Gemma2-8k-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7347449212533854 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5896713649821103 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20166163141993956 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3540268456375839 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4343958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4222905585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/hotmailuser/Gemma2Crono-27B/501e2a2c-e32c-455e-8e5f-f8bde053fddc.json b/data/hfopenllm_v2/google/hotmailuser/Gemma2Crono-27B/501e2a2c-e32c-455e-8e5f-f8bde053fddc.json new file mode 100644 index 000000000..beb48a038 --- /dev/null +++ b/data/hfopenllm_v2/google/hotmailuser/Gemma2Crono-27B/501e2a2c-e32c-455e-8e5f-f8bde053fddc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_Gemma2Crono-27B/1762652580.193866", + "retrieved_timestamp": "1762652580.193866", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/Gemma2Crono-27B", + "developer": "google", + "inference_platform": "unknown", + "id": "hotmailuser/Gemma2Crono-27B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7086164709637096 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6505341690680219 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24244712990936557 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37080536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45668749999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4632646276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/hotmailuser/Gemma2SimPO-27B/433a8abf-8ff7-40bb-a4d0-654efdb6bf86.json b/data/hfopenllm_v2/google/hotmailuser/Gemma2SimPO-27B/433a8abf-8ff7-40bb-a4d0-654efdb6bf86.json new file mode 100644 index 000000000..9d2f8db92 --- /dev/null +++ b/data/hfopenllm_v2/google/hotmailuser/Gemma2SimPO-27B/433a8abf-8ff7-40bb-a4d0-654efdb6bf86.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_Gemma2SimPO-27B/1762652580.194106", + "retrieved_timestamp": "1762652580.1941068", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/Gemma2SimPO-27B", + "developer": "google", + "inference_platform": "unknown", + "id": "hotmailuser/Gemma2SimPO-27B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7222303488078299 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6413158976157102 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28172205438066467 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35822147651006714 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44465625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46417885638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/hotmailuser/Gemma2atlas-27B/c9020f27-9175-4f12-a108-6cbff1c0cb22.json b/data/hfopenllm_v2/google/hotmailuser/Gemma2atlas-27B/c9020f27-9175-4f12-a108-6cbff1c0cb22.json new file mode 100644 index 000000000..effa3723a --- /dev/null +++ b/data/hfopenllm_v2/google/hotmailuser/Gemma2atlas-27B/c9020f27-9175-4f12-a108-6cbff1c0cb22.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_Gemma2atlas-27B/1762652580.1943119", + "retrieved_timestamp": "1762652580.194313", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/Gemma2atlas-27B", + "developer": "google", + "inference_platform": "unknown", + "id": "hotmailuser/Gemma2atlas-27B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7213560020744957 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6544960921220462 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21450151057401812 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35570469798657717 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44453125000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4749833776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/hotmailuser/Gemma2magnum-27b/0ad192a1-b33f-4362-a21d-ccc590986c5c.json b/data/hfopenllm_v2/google/hotmailuser/Gemma2magnum-27b/0ad192a1-b33f-4362-a21d-ccc590986c5c.json new file mode 100644 index 000000000..ec055e1f1 --- /dev/null +++ b/data/hfopenllm_v2/google/hotmailuser/Gemma2magnum-27b/0ad192a1-b33f-4362-a21d-ccc590986c5c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_Gemma2magnum-27b/1762652580.1945128", + "retrieved_timestamp": "1762652580.194514", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/Gemma2magnum-27b", + "developer": "google", + "inference_platform": "unknown", + "id": "hotmailuser/Gemma2magnum-27b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5050599077115387 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6199590493843724 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22054380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3850671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47234375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45960771276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/ifable/gemma-2-Ifable-9B/42b3b64b-0e15-4a49-b542-da27ab7e2143.json b/data/hfopenllm_v2/google/ifable/gemma-2-Ifable-9B/42b3b64b-0e15-4a49-b542-da27ab7e2143.json new file mode 100644 index 000000000..2d47c7cc2 --- /dev/null +++ b/data/hfopenllm_v2/google/ifable/gemma-2-Ifable-9B/42b3b64b-0e15-4a49-b542-da27ab7e2143.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ifable_gemma-2-Ifable-9B/1762652580.225604", + "retrieved_timestamp": "1762652580.225605", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ifable/gemma-2-Ifable-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "ifable/gemma-2-Ifable-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2984292787581395 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5866115556693244 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13972809667673716 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3414429530201342 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40525000000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4226230053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/jebish7/gemma-2-2b-it/86206a02-3ab9-4a86-a00c-2900e8cd2e18.json b/data/hfopenllm_v2/google/jebish7/gemma-2-2b-it/86206a02-3ab9-4a86-a00c-2900e8cd2e18.json new file mode 100644 index 000000000..90a53be85 --- /dev/null +++ b/data/hfopenllm_v2/google/jebish7/gemma-2-2b-it/86206a02-3ab9-4a86-a00c-2900e8cd2e18.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jebish7_gemma-2-2b-it/1762652580.2824588", + "retrieved_timestamp": "1762652580.2824588", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jebish7/gemma-2-2b-it", + "developer": "google", + "inference_platform": "unknown", + "id": "jebish7/gemma-2-2b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12717035244263 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43951564907099594 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.033987915407854986 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42444791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27152593085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/jebish7/gemma-2-9b-it/80a35d79-893b-439f-b100-a538a3c86974.json b/data/hfopenllm_v2/google/jebish7/gemma-2-9b-it/80a35d79-893b-439f-b100-a538a3c86974.json new file mode 100644 index 000000000..8a9b186c5 --- /dev/null +++ b/data/hfopenllm_v2/google/jebish7/gemma-2-9b-it/80a35d79-893b-439f-b100-a538a3c86974.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jebish7_gemma-2-9b-it/1762652580.282719", + "retrieved_timestamp": "1762652580.28272", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jebish7/gemma-2-9b-it", + "developer": "google", + "inference_platform": "unknown", + "id": "jebish7/gemma-2-9b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1557467519514887 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5949210568047724 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34731543624161076 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4554479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.414311835106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-9B/9ba72d50-4321-4383-8be9-286a56607624.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-9B/9ba72d50-4321-4383-8be9-286a56607624.json new file mode 100644 index 000000000..bd5c56feb --- /dev/null +++ b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-9B/9ba72d50-4321-4383-8be9-286a56607624.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-9B/1762652580.31483", + "retrieved_timestamp": "1762652580.314831", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lemon07r/Gemma-2-Ataraxy-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "lemon07r/Gemma-2-Ataraxy-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3008772279773224 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5931298417725773 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08534743202416918 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3347315436241611 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4424270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4226230053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-Advanced-9B/7806d1aa-b9e2-45bc-b89d-76e6c48dd3a0.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-Advanced-9B/7806d1aa-b9e2-45bc-b89d-76e6c48dd3a0.json new file mode 100644 index 000000000..b77ef0acc --- /dev/null +++ b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-Advanced-9B/7806d1aa-b9e2-45bc-b89d-76e6c48dd3a0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-Advanced-9B/1762652580.315091", + "retrieved_timestamp": "1762652580.315092", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lemon07r/Gemma-2-Ataraxy-Advanced-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "lemon07r/Gemma-2-Ataraxy-Advanced-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5515964308036011 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5889067263184956 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19788519637462235 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33557046979865773 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3760729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4243683510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-Remix-9B/29dfbb00-8760-46d8-bef8-d036870fb0c0.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-Remix-9B/29dfbb00-8760-46d8-bef8-d036870fb0c0.json new file mode 100644 index 000000000..0e39f1369 --- /dev/null +++ b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-Remix-9B/29dfbb00-8760-46d8-bef8-d036870fb0c0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-Remix-9B/1762652580.31531", + "retrieved_timestamp": "1762652580.3153112", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lemon07r/Gemma-2-Ataraxy-Remix-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "lemon07r/Gemma-2-Ataraxy-Remix-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7083416446140685 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5892021015046846 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20166163141993956 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3389261744966443 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4371875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42386968085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2-9B/ca1b9625-0112-4ebf-b1c3-d2dd217d50b2.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2-9B/ca1b9625-0112-4ebf-b1c3-d2dd217d50b2.json new file mode 100644 index 000000000..07a4545a4 --- /dev/null +++ b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2-9B/ca1b9625-0112-4ebf-b1c3-d2dd217d50b2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v2-9B/1762652580.315539", + "retrieved_timestamp": "1762652580.31554", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lemon07r/Gemma-2-Ataraxy-v2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "lemon07r/Gemma-2-Ataraxy-v2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21362429464930827 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5765835815625312 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3422818791946309 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34838541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.422124335106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2a-9B/4fa1e172-f570-4a96-b53a-8ecf31854191.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2a-9B/4fa1e172-f570-4a96-b53a-8ecf31854191.json new file mode 100644 index 000000000..cd1dd636a --- /dev/null +++ b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2a-9B/4fa1e172-f570-4a96-b53a-8ecf31854191.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v2a-9B/1762652580.315754", + "retrieved_timestamp": "1762652580.315755", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lemon07r/Gemma-2-Ataraxy-v2a-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "lemon07r/Gemma-2-Ataraxy-v2a-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15946909755005606 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.518248966271832 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33976510067114096 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31647916666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35147938829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2f-9B/fd59fb1c-3681-44d2-9172-b10891ae9c55.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2f-9B/fd59fb1c-3681-44d2-9172-b10891ae9c55.json new file mode 100644 index 000000000..1323909e0 --- /dev/null +++ b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v2f-9B/fd59fb1c-3681-44d2-9172-b10891ae9c55.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v2f-9B/1762652580.315967", + "retrieved_timestamp": "1762652580.315968", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lemon07r/Gemma-2-Ataraxy-v2f-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "lemon07r/Gemma-2-Ataraxy-v2f-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37911408396388246 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5192845467961766 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1163141993957704 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3389261744966443 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3231458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3503158244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B/778a10b0-c537-4592-9dbb-2b0de07ced4c.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B/778a10b0-c537-4592-9dbb-2b0de07ced4c.json new file mode 100644 index 000000000..e367ecf9d --- /dev/null +++ b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B/778a10b0-c537-4592-9dbb-2b0de07ced4c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v3-Advanced-9B/1762652580.316169", + "retrieved_timestamp": "1762652580.316169", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6601816513517467 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5935146853737787 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18731117824773413 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33640939597315433 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44496874999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41963098404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3b-9B/d048e6ad-cc57-4ebe-8376-262564e86f0c.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3b-9B/d048e6ad-cc57-4ebe-8376-262564e86f0c.json new file mode 100644 index 000000000..643f90267 --- /dev/null +++ b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3b-9B/d048e6ad-cc57-4ebe-8376-262564e86f0c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v3b-9B/1762652580.3163798", + "retrieved_timestamp": "1762652580.316381", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lemon07r/Gemma-2-Ataraxy-v3b-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "lemon07r/Gemma-2-Ataraxy-v3b-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6809144181881852 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5907698162898164 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21525679758308158 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33305369127516776 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44887499999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4204621010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3i-9B/53602c70-73d9-461b-b27a-24c6a1a538e5.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3i-9B/53602c70-73d9-461b-b27a-24c6a1a538e5.json new file mode 100644 index 000000000..722bad59f --- /dev/null +++ b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3i-9B/53602c70-73d9-461b-b27a-24c6a1a538e5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v3i-9B/1762652580.3165948", + "retrieved_timestamp": "1762652580.316596", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lemon07r/Gemma-2-Ataraxy-v3i-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "lemon07r/Gemma-2-Ataraxy-v3i-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4203047912871182 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5625750779805955 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15332326283987915 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31806249999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41663896276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3j-9B/d435bd27-1c26-429d-8ac5-8fd8c591a9aa.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3j-9B/d435bd27-1c26-429d-8ac5-8fd8c591a9aa.json new file mode 100644 index 000000000..137075982 --- /dev/null +++ b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v3j-9B/d435bd27-1c26-429d-8ac5-8fd8c591a9aa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v3j-9B/1762652580.3168168", + "retrieved_timestamp": "1762652580.316818", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lemon07r/Gemma-2-Ataraxy-v3j-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "lemon07r/Gemma-2-Ataraxy-v3j-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4169326276501904 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5632286961183511 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1691842900302115 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31803125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41339760638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B/c0e95e3f-37a4-4b2f-a37b-37854546c241.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B/c0e95e3f-37a4-4b2f-a37b-37854546c241.json new file mode 100644 index 000000000..780ec306b --- /dev/null +++ b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B/c0e95e3f-37a4-4b2f-a37b-37854546c241.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4-Advanced-9B/1762652580.317157", + "retrieved_timestamp": "1762652580.3171608", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7015474496558022 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6023627309683861 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21525679758308158 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3389261744966443 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4580520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4366688829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B/b84aedba-7b87-445d-87c2-b029cb0038c3.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B/b84aedba-7b87-445d-87c2-b029cb0038c3.json new file mode 100644 index 000000000..848eef8cc --- /dev/null +++ b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B/b84aedba-7b87-445d-87c2-b029cb0038c3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4a-Advanced-9B/1762652580.317515", + "retrieved_timestamp": "1762652580.317516", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7135123694020753 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.598838715496553 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21148036253776434 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34395973154362414 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44890625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4309341755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4b-9B/41f04f45-2f1d-42fd-87de-cc5e484cada2.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4b-9B/41f04f45-2f1d-42fd-87de-cc5e484cada2.json new file mode 100644 index 000000000..a6d98fe50 --- /dev/null +++ b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4b-9B/41f04f45-2f1d-42fd-87de-cc5e484cada2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4b-9B/1762652580.317803", + "retrieved_timestamp": "1762652580.317804", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lemon07r/Gemma-2-Ataraxy-v4b-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "lemon07r/Gemma-2-Ataraxy-v4b-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6878338364428604 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6039158192304305 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23338368580060423 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34060402684563756 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45547916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4356715425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4c-9B/9499ec24-5be2-478c-b13e-3102d1555668.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4c-9B/9499ec24-5be2-478c-b13e-3102d1555668.json new file mode 100644 index 000000000..4bc386bd5 --- /dev/null +++ b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4c-9B/9499ec24-5be2-478c-b13e-3102d1555668.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4c-9B/1762652580.318075", + "retrieved_timestamp": "1762652580.318076", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lemon07r/Gemma-2-Ataraxy-v4c-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "lemon07r/Gemma-2-Ataraxy-v4c-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6945282960323054 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6084319292299174 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22658610271903323 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3338926174496644 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45278124999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43949468085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4d-9B/7e6685d8-af21-4810-a9cc-edb296f4b937.json b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4d-9B/7e6685d8-af21-4810-a9cc-edb296f4b937.json new file mode 100644 index 000000000..ad5a8c68d --- /dev/null +++ b/data/hfopenllm_v2/google/lemon07r/Gemma-2-Ataraxy-v4d-9B/7e6685d8-af21-4810-a9cc-edb296f4b937.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4d-9B/1762652580.318495", + "retrieved_timestamp": "1762652580.318496", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lemon07r/Gemma-2-Ataraxy-v4d-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "lemon07r/Gemma-2-Ataraxy-v4d-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7250029920610646 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6054158192304304 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23338368580060423 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34731543624161076 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4541458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4345910904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/lkoenig/BBAI_200_Gemma/b71c5ede-010d-4ce4-9f12-552388e2d9eb.json b/data/hfopenllm_v2/google/lkoenig/BBAI_200_Gemma/b71c5ede-010d-4ce4-9f12-552388e2d9eb.json new file mode 100644 index 000000000..b7d2d0d7d --- /dev/null +++ b/data/hfopenllm_v2/google/lkoenig/BBAI_200_Gemma/b71c5ede-010d-4ce4-9f12-552388e2d9eb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_200_Gemma/1762652580.32272", + "retrieved_timestamp": "1762652580.32272", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lkoenig/BBAI_200_Gemma", + "developer": "google", + "inference_platform": "unknown", + "id": "lkoenig/BBAI_200_Gemma" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07051733843978422 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3449044607726533 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36311458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16788563829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 19.3 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/monsterapi/gemma-2-2b-LoRA-MonsterInstruct/f5395aa2-334b-410c-a2ee-4d7381f1c9bc.json b/data/hfopenllm_v2/google/monsterapi/gemma-2-2b-LoRA-MonsterInstruct/f5395aa2-334b-410c-a2ee-4d7381f1c9bc.json new file mode 100644 index 000000000..fc07be68f --- /dev/null +++ b/data/hfopenllm_v2/google/monsterapi/gemma-2-2b-LoRA-MonsterInstruct/f5395aa2-334b-410c-a2ee-4d7381f1c9bc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/monsterapi_gemma-2-2b-LoRA-MonsterInstruct/1762652580.372597", + "retrieved_timestamp": "1762652580.372598", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "monsterapi/gemma-2-2b-LoRA-MonsterInstruct", + "developer": "google", + "inference_platform": "unknown", + "id": "monsterapi/gemma-2-2b-LoRA-MonsterInstruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3902545246612322 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36496861927498697 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3643854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19872007978723405 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/mt5-base/621fb00c-90a0-4295-9bd6-f5e102bc0bab.json b/data/hfopenllm_v2/google/mt5-base/621fb00c-90a0-4295-9bd6-f5e102bc0bab.json new file mode 100644 index 000000000..7a1044fc2 --- /dev/null +++ b/data/hfopenllm_v2/google/mt5-base/621fb00c-90a0-4295-9bd6-f5e102bc0bab.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_mt5-base/1762652580.178463", + "retrieved_timestamp": "1762652580.178463", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/mt5-base", + "developer": "google", + "inference_platform": "unknown", + "id": "google/mt5-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1645157072124186 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28831600228488835 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23909395973154363 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36720833333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10696476063829788 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MT5ForConditionalGeneration", + "params_billions": 0.39 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/mt5-small/0d958c7c-5cd9-459f-a0e9-235b5d41ae53.json b/data/hfopenllm_v2/google/mt5-small/0d958c7c-5cd9-459f-a0e9-235b5d41ae53.json new file mode 100644 index 000000000..0bf4ff92a --- /dev/null +++ b/data/hfopenllm_v2/google/mt5-small/0d958c7c-5cd9-459f-a0e9-235b5d41ae53.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_mt5-small/1762652580.1787279", + "retrieved_timestamp": "1762652580.178729", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/mt5-small", + "developer": "google", + "inference_platform": "unknown", + "id": "google/mt5-small" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17180968718555653 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2765842029929075 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2424496644295302 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38575 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11228390957446809 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MT5ForConditionalGeneration", + "params_billions": 0.17 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/mt5-xl/5abb3ce9-6ad4-4dfa-8bca-81ec6cb84426.json b/data/hfopenllm_v2/google/mt5-xl/5abb3ce9-6ad4-4dfa-8bca-81ec6cb84426.json new file mode 100644 index 000000000..d9ac4c2e5 --- /dev/null +++ b/data/hfopenllm_v2/google/mt5-xl/5abb3ce9-6ad4-4dfa-8bca-81ec6cb84426.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_mt5-xl/1762652580.17897", + "retrieved_timestamp": "1762652580.1789708", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/mt5-xl", + "developer": "google", + "inference_platform": "unknown", + "id": "google/mt5-xl" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19596448534333347 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.304735837080435 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3795208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11195146276595745 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MT5ForConditionalGeneration", + "params_billions": 3.23 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/mt5-xxl/38520cce-b3b6-4f22-a6a8-313f6181f5ea.json b/data/hfopenllm_v2/google/mt5-xxl/38520cce-b3b6-4f22-a6a8-313f6181f5ea.json new file mode 100644 index 000000000..ddbda16ff --- /dev/null +++ b/data/hfopenllm_v2/google/mt5-xxl/38520cce-b3b6-4f22-a6a8-313f6181f5ea.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_mt5-xxl/1762652580.1791801", + "retrieved_timestamp": "1762652580.1791801", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/mt5-xxl", + "developer": "google", + "inference_platform": "unknown", + "id": "google/mt5-xxl" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23575668116154028 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2959344159116905 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24161073825503357 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36894791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10887632978723404 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "T5ForConditionalGeneration", + "params_billions": 11.9 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/nbeerbower/Gemma2-Gutenberg-Doppel-9B/b6514bef-f106-45e0-8571-da3507b0e95b.json b/data/hfopenllm_v2/google/nbeerbower/Gemma2-Gutenberg-Doppel-9B/b6514bef-f106-45e0-8571-da3507b0e95b.json new file mode 100644 index 000000000..86a654c2a --- /dev/null +++ b/data/hfopenllm_v2/google/nbeerbower/Gemma2-Gutenberg-Doppel-9B/b6514bef-f106-45e0-8571-da3507b0e95b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Gemma2-Gutenberg-Doppel-9B/1762652580.378716", + "retrieved_timestamp": "1762652580.378717", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Gemma2-Gutenberg-Doppel-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "nbeerbower/Gemma2-Gutenberg-Doppel-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7171094917042337 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5870114193661848 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19788519637462235 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3296979865771812 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46078125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41273271276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/nbeerbower/gemma2-gutenberg-27B/b0a9fb09-2637-4b4c-9d78-7dc8d9c6aad2.json b/data/hfopenllm_v2/google/nbeerbower/gemma2-gutenberg-27B/b0a9fb09-2637-4b4c-9d78-7dc8d9c6aad2.json new file mode 100644 index 000000000..54c32c119 --- /dev/null +++ b/data/hfopenllm_v2/google/nbeerbower/gemma2-gutenberg-27B/b0a9fb09-2637-4b4c-9d78-7dc8d9c6aad2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_gemma2-gutenberg-27B/1762652580.384448", + "retrieved_timestamp": "1762652580.3844512", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/gemma2-gutenberg-27B", + "developer": "google", + "inference_platform": "unknown", + "id": "nbeerbower/gemma2-gutenberg-27B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29470804133033685 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37965683503451614 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0188821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3727291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19822140957446807 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 27.227 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/nbeerbower/gemma2-gutenberg-9B/14dc56ff-7f3b-430e-a4b3-6e4c9961fea3.json b/data/hfopenllm_v2/google/nbeerbower/gemma2-gutenberg-9B/14dc56ff-7f3b-430e-a4b3-6e4c9961fea3.json new file mode 100644 index 000000000..ff31bbd24 --- /dev/null +++ b/data/hfopenllm_v2/google/nbeerbower/gemma2-gutenberg-9B/14dc56ff-7f3b-430e-a4b3-6e4c9961fea3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_gemma2-gutenberg-9B/1762652580.384712", + "retrieved_timestamp": "1762652580.384713", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/gemma2-gutenberg-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "nbeerbower/gemma2-gutenberg-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2795948084416016 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5950904001490335 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08081570996978851 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45951041666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4192154255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/nhyha/N3N_gemma-2-9b-it_20241029_1532/cb85dee2-acee-48f8-85aa-1d5664179fd5.json b/data/hfopenllm_v2/google/nhyha/N3N_gemma-2-9b-it_20241029_1532/cb85dee2-acee-48f8-85aa-1d5664179fd5.json new file mode 100644 index 000000000..de663d3ec --- /dev/null +++ b/data/hfopenllm_v2/google/nhyha/N3N_gemma-2-9b-it_20241029_1532/cb85dee2-acee-48f8-85aa-1d5664179fd5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nhyha_N3N_gemma-2-9b-it_20241029_1532/1762652580.4059799", + "retrieved_timestamp": "1762652580.4059808", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nhyha/N3N_gemma-2-9b-it_20241029_1532", + "developer": "google", + "inference_platform": "unknown", + "id": "nhyha/N3N_gemma-2-9b-it_20241029_1532" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6751940407008958 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5863124381827675 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2122356495468278 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34060402684563756 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4593541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4122340425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/nhyha/N3N_gemma-2-9b-it_20241110_2026/4c450b48-8477-45cb-9cfa-814c21dd39d7.json b/data/hfopenllm_v2/google/nhyha/N3N_gemma-2-9b-it_20241110_2026/4c450b48-8477-45cb-9cfa-814c21dd39d7.json new file mode 100644 index 000000000..9ec2d6cd1 --- /dev/null +++ b/data/hfopenllm_v2/google/nhyha/N3N_gemma-2-9b-it_20241110_2026/4c450b48-8477-45cb-9cfa-814c21dd39d7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nhyha_N3N_gemma-2-9b-it_20241110_2026/1762652580.406234", + "retrieved_timestamp": "1762652580.406235", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nhyha/N3N_gemma-2-9b-it_20241110_2026", + "developer": "google", + "inference_platform": "unknown", + "id": "nhyha/N3N_gemma-2-9b-it_20241110_2026" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6282829558903709 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5867149609980419 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1608761329305136 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33640939597315433 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40730208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40201130319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/nidum/Nidum-Limitless-Gemma-2B/49e352c1-2319-4bc5-aa3f-1697739a05b8.json b/data/hfopenllm_v2/google/nidum/Nidum-Limitless-Gemma-2B/49e352c1-2319-4bc5-aa3f-1697739a05b8.json new file mode 100644 index 000000000..4268cf870 --- /dev/null +++ b/data/hfopenllm_v2/google/nidum/Nidum-Limitless-Gemma-2B/49e352c1-2319-4bc5-aa3f-1697739a05b8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nidum_Nidum-Limitless-Gemma-2B/1762652580.406632", + "retrieved_timestamp": "1762652580.406633", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nidum/Nidum-Limitless-Gemma-2B", + "developer": "google", + "inference_platform": "unknown", + "id": "nidum/Nidum-Limitless-Gemma-2B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24235140538216376 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3078801520076317 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37403125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11735372340425532 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GemmaForCausalLM", + "params_billions": 2.506 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/noname0202/gemma-2-2b-it-ties/42bed40b-ac71-42c8-b56b-47d1f930c736.json b/data/hfopenllm_v2/google/noname0202/gemma-2-2b-it-ties/42bed40b-ac71-42c8-b56b-47d1f930c736.json new file mode 100644 index 000000000..9cc58ee0f --- /dev/null +++ b/data/hfopenllm_v2/google/noname0202/gemma-2-2b-it-ties/42bed40b-ac71-42c8-b56b-47d1f930c736.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/noname0202_gemma-2-2b-it-ties/1762652580.4097438", + "retrieved_timestamp": "1762652580.409745", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "noname0202/gemma-2-2b-it-ties", + "developer": "google", + "inference_platform": "unknown", + "id": "noname0202/gemma-2-2b-it-ties" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12657083205893696 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42057403060290816 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02416918429003021 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39288541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2560671542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/princeton-nlp/gemma-2-9b-it-SimPO/4285b38c-aba8-444b-9b0b-b265c7b1fef1.json b/data/hfopenllm_v2/google/princeton-nlp/gemma-2-9b-it-SimPO/4285b38c-aba8-444b-9b0b-b265c7b1fef1.json new file mode 100644 index 000000000..d17aed9d2 --- /dev/null +++ b/data/hfopenllm_v2/google/princeton-nlp/gemma-2-9b-it-SimPO/4285b38c-aba8-444b-9b0b-b265c7b1fef1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_gemma-2-9b-it-SimPO/1762652580.454763", + "retrieved_timestamp": "1762652580.4547682", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/gemma-2-9b-it-SimPO", + "developer": "google", + "inference_platform": "unknown", + "id": "princeton-nlp/gemma-2-9b-it-SimPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3206857803960159 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5839179923162123 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07099697885196375 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33557046979865773 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41232291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39752327127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/qq8933/OpenLongCoT-Base-Gemma2-2B/c945b9b5-7b46-4300-adcc-2d6c94df0ac1.json b/data/hfopenllm_v2/google/qq8933/OpenLongCoT-Base-Gemma2-2B/c945b9b5-7b46-4300-adcc-2d6c94df0ac1.json new file mode 100644 index 000000000..1a960d5f0 --- /dev/null +++ b/data/hfopenllm_v2/google/qq8933/OpenLongCoT-Base-Gemma2-2B/c945b9b5-7b46-4300-adcc-2d6c94df0ac1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qq8933_OpenLongCoT-Base-Gemma2-2B/1762652580.488883", + "retrieved_timestamp": "1762652580.488883", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qq8933/OpenLongCoT-Base-Gemma2-2B", + "developer": "google", + "inference_platform": "unknown", + "id": "qq8933/OpenLongCoT-Base-Gemma2-2B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1965141380426158 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3106362870893106 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.023413897280966767 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32225 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1315658244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 3.204 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/054a662a-e425-448c-9556-6998833e51ff.json b/data/hfopenllm_v2/google/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/054a662a-e425-448c-9556-6998833e51ff.json new file mode 100644 index 000000000..13538a715 --- /dev/null +++ b/data/hfopenllm_v2/google/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/054a662a-e425-448c-9556-6998833e51ff.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/1762652580.491333", + "retrieved_timestamp": "1762652580.491333", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp", + "developer": "google", + "inference_platform": "unknown", + "id": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7648949232480928 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.597438766061506 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.017371601208459216 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4244791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4207114361702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/0a685d8f-38c7-4521-9613-7b36ad1cac73.json b/data/hfopenllm_v2/google/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/0a685d8f-38c7-4521-9613-7b36ad1cac73.json new file mode 100644 index 000000000..b6c5609aa --- /dev/null +++ b/data/hfopenllm_v2/google/recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp/0a685d8f-38c7-4521-9613-7b36ad1cac73.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/1762652580.491603", + "retrieved_timestamp": "1762652580.491603", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp", + "developer": "google", + "inference_platform": "unknown", + "id": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28536505361330156 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5983926033872208 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10045317220543806 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3296979865771812 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46065625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4162234042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.1/d31a41b0-6500-4e1b-8435-b9d3e9725c02.json b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.1/d31a41b0-6500-4e1b-8435-b9d3e9725c02.json new file mode 100644 index 000000000..70a862a77 --- /dev/null +++ b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.1/d31a41b0-6500-4e1b-8435-b9d3e9725c02.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.1/1762652580.491797", + "retrieved_timestamp": "1762652580.491798", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme/recoilme-gemma-2-9B-v0.1", + "developer": "google", + "inference_platform": "unknown", + "id": "recoilme/recoilme-gemma-2-9B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.751506004069203 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5995309756292291 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3389261744966443 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41914583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4158909574468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.2/5826c93f-3642-44cf-b385-4a5ab5103086.json b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.2/5826c93f-3642-44cf-b385-4a5ab5103086.json new file mode 100644 index 000000000..3e67a6fa1 --- /dev/null +++ b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.2/5826c93f-3642-44cf-b385-4a5ab5103086.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.2/1762652580.4922318", + "retrieved_timestamp": "1762652580.492233", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme/recoilme-gemma-2-9B-v0.2", + "developer": "google", + "inference_platform": "unknown", + "id": "recoilme/recoilme-gemma-2-9B-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2746989100032359 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6030832642626502 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46859375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4122340425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.2/6a15378c-36cc-4f5e-b184-5a19a6fbb192.json b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.2/6a15378c-36cc-4f5e-b184-5a19a6fbb192.json new file mode 100644 index 000000000..dc617e475 --- /dev/null +++ b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.2/6a15378c-36cc-4f5e-b184-5a19a6fbb192.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.2/1762652580.492019", + "retrieved_timestamp": "1762652580.49202", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme/recoilme-gemma-2-9B-v0.2", + "developer": "google", + "inference_platform": "unknown", + "id": "recoilme/recoilme-gemma-2-9B-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7591745457608035 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6025964285724085 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.052870090634441085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288590604026846 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.409875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41630651595744683 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.3/47cfe707-ba31-4c9b-aa15-9ab8b566e206.json b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.3/47cfe707-ba31-4c9b-aa15-9ab8b566e206.json new file mode 100644 index 000000000..19c46567c --- /dev/null +++ b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.3/47cfe707-ba31-4c9b-aa15-9ab8b566e206.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.3/1762652580.492416", + "retrieved_timestamp": "1762652580.492416", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme/recoilme-gemma-2-9B-v0.3", + "developer": "google", + "inference_platform": "unknown", + "id": "recoilme/recoilme-gemma-2-9B-v0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.743937197746424 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5992527878628748 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08761329305135952 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4203854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4072473404255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.3/8d3bd687-89f5-4d62-af46-93646aea4341.json b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.3/8d3bd687-89f5-4d62-af46-93646aea4341.json new file mode 100644 index 000000000..9dee92018 --- /dev/null +++ b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.3/8d3bd687-89f5-4d62-af46-93646aea4341.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.3/1762652580.492666", + "retrieved_timestamp": "1762652580.492667", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme/recoilme-gemma-2-9B-v0.3", + "developer": "google", + "inference_platform": "unknown", + "id": "recoilme/recoilme-gemma-2-9B-v0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.57607592299543 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6019827101058847 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18882175226586104 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.337248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46322916666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4039228723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.4/28eef1b7-a83e-49c9-8f11-ef9e4ae7e1ce.json b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.4/28eef1b7-a83e-49c9-8f11-ef9e4ae7e1ce.json new file mode 100644 index 000000000..26aa56d1c --- /dev/null +++ b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.4/28eef1b7-a83e-49c9-8f11-ef9e4ae7e1ce.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.4/1762652580.4928808", + "retrieved_timestamp": "1762652580.492882", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme/recoilme-gemma-2-9B-v0.4", + "developer": "google", + "inference_platform": "unknown", + "id": "recoilme/recoilme-gemma-2-9B-v0.4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2561891337207498 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5967285833554881 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34060402684563756 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4726875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4405751329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.5/8fe5a1e8-1491-4e64-8aed-32e73f2dae6e.json b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.5/8fe5a1e8-1491-4e64-8aed-32e73f2dae6e.json new file mode 100644 index 000000000..003747655 --- /dev/null +++ b/data/hfopenllm_v2/google/recoilme/recoilme-gemma-2-9B-v0.5/8fe5a1e8-1491-4e64-8aed-32e73f2dae6e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.5/1762652580.4931269", + "retrieved_timestamp": "1762652580.493134", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "recoilme/recoilme-gemma-2-9B-v0.5", + "developer": "google", + "inference_platform": "unknown", + "id": "recoilme/recoilme-gemma-2-9B-v0.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7664186580495308 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5981472549925003 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21148036253776434 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33640939597315433 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4231770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41996343085106386 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/sequelbox/gemma-2-9B-MOTH/4bdefb85-2413-43b7-8938-869ad0cff58f.json b/data/hfopenllm_v2/google/sequelbox/gemma-2-9B-MOTH/4bdefb85-2413-43b7-8938-869ad0cff58f.json new file mode 100644 index 000000000..b7e8a93e4 --- /dev/null +++ b/data/hfopenllm_v2/google/sequelbox/gemma-2-9B-MOTH/4bdefb85-2413-43b7-8938-869ad0cff58f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sequelbox_gemma-2-9B-MOTH/1762652580.5126731", + "retrieved_timestamp": "1762652580.512674", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sequelbox/gemma-2-9B-MOTH", + "developer": "google", + "inference_platform": "unknown", + "id": "sequelbox/gemma-2-9B-MOTH" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20588150551647405 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30797000521562534 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3409479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11402925531914894 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/switch-base-8/43e22ce0-cdd7-424f-8a01-f9fea8b2a010.json b/data/hfopenllm_v2/google/switch-base-8/43e22ce0-cdd7-424f-8a01-f9fea8b2a010.json new file mode 100644 index 000000000..2b4e27390 --- /dev/null +++ b/data/hfopenllm_v2/google/switch-base-8/43e22ce0-cdd7-424f-8a01-f9fea8b2a010.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_switch-base-8/1762652580.180255", + "retrieved_timestamp": "1762652580.180256", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/switch-base-8", + "developer": "google", + "inference_platform": "unknown", + "id": "google/switch-base-8" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15852050337548815 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28763132730669333 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35173958333333327 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10979055851063829 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "SwitchTransformersForConditionalGeneration", + "params_billions": 0.62 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/umt5-base/659053b0-7694-41e7-916d-28406b3ed572.json b/data/hfopenllm_v2/google/umt5-base/659053b0-7694-41e7-916d-28406b3ed572.json new file mode 100644 index 000000000..0dffb525d --- /dev/null +++ b/data/hfopenllm_v2/google/umt5-base/659053b0-7694-41e7-916d-28406b3ed572.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/google_umt5-base/1762652580.180466", + "retrieved_timestamp": "1762652580.180467", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "google/umt5-base", + "developer": "google", + "inference_platform": "unknown", + "id": "google/umt5-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.174632198123202 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27877262328945457 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.004531722054380665 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33821875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10779587765957446 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "UMT5ForConditionalGeneration", + "params_billions": -1.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/wzhouad/gemma-2-9b-it-WPO-HB/70fe199f-6c81-4d99-a595-208b7abc321f.json b/data/hfopenllm_v2/google/wzhouad/gemma-2-9b-it-WPO-HB/70fe199f-6c81-4d99-a595-208b7abc321f.json new file mode 100644 index 000000000..74263acd9 --- /dev/null +++ b/data/hfopenllm_v2/google/wzhouad/gemma-2-9b-it-WPO-HB/70fe199f-6c81-4d99-a595-208b7abc321f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/wzhouad_gemma-2-9b-it-WPO-HB/1762652580.596365", + "retrieved_timestamp": "1762652580.5963662", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wzhouad/gemma-2-9b-it-WPO-HB", + "developer": "google", + "inference_platform": "unknown", + "id": "wzhouad/gemma-2-9b-it-WPO-HB" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5437029304467702 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5628624376751974 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15332326283987915 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3498322147651007 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3674583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33602061170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/9c7a213f-e5f8-4cc2-9cbe-d61db2cf2bbe.json b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/9c7a213f-e5f8-4cc2-9cbe-d61db2cf2bbe.json new file mode 100644 index 000000000..2cd22138a --- /dev/null +++ b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/9c7a213f-e5f8-4cc2-9cbe-d61db2cf2bbe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/1762652580.609323", + "retrieved_timestamp": "1762652580.609324", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge", + "developer": "google", + "inference_platform": "unknown", + "id": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5218209905273563 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.414688942270627 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35139583333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24609375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18/23800723-b5bd-4fc6-9d07-ca937c8680c6.json b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18/23800723-b5bd-4fc6-9d07-ca937c8680c6.json new file mode 100644 index 000000000..20e1bc4ed --- /dev/null +++ b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18/23800723-b5bd-4fc6-9d07-ca937c8680c6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18/1762652580.6090298", + "retrieved_timestamp": "1762652580.609031", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18", + "developer": "google", + "inference_platform": "unknown", + "id": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4630945890237902 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4052902505118913 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3754270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23445811170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24/7321bd04-6f20-427a-8219-0ff2e299cb01.json b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24/7321bd04-6f20-427a-8219-0ff2e299cb01.json new file mode 100644 index 000000000..1d85b8c3b --- /dev/null +++ b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24/7321bd04-6f20-427a-8219-0ff2e299cb01.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-17-18-24/1762652580.609858", + "retrieved_timestamp": "1762652580.609859", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24", + "developer": "google", + "inference_platform": "unknown", + "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.505484337114412 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38123590457353557 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0256797583081571 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35015625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2282247340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/3cc8621a-b38c-4735-af09-027989774289.json b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/3cc8621a-b38c-4735-af09-027989774289.json new file mode 100644 index 000000000..aac637ccc --- /dev/null +++ b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/3cc8621a-b38c-4735-af09-027989774289.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/1762652580.6102881", + "retrieved_timestamp": "1762652580.6102889", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca", + "developer": "google", + "inference_platform": "unknown", + "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30647349033896726 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40715971926711275 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0324773413897281 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39691666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2249002659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO/44b47789-f529-4bae-9e87-196abc325efc.json b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO/44b47789-f529-4bae-9e87-196abc325efc.json new file mode 100644 index 000000000..195b07441 --- /dev/null +++ b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO/44b47789-f529-4bae-9e87-196abc325efc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO/1762652580.610075", + "retrieved_timestamp": "1762652580.610076", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO", + "developer": "google", + "inference_platform": "unknown", + "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47478468242042227 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38979797271028965 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.061933534743202415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37676041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21908244680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17/5958a61d-bf39-4de4-bfe1-6a6db2f37f55.json b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17/5958a61d-bf39-4de4-bfe1-6a6db2f37f55.json new file mode 100644 index 000000000..7e09f976f --- /dev/null +++ b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-17/5958a61d-bf39-4de4-bfe1-6a6db2f37f55.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-17/1762652580.609628", + "retrieved_timestamp": "1762652580.609628", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ymcki/gemma-2-2b-jpn-it-abliterated-17", + "developer": "google", + "inference_platform": "unknown", + "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5081572449988254 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40762664531580056 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37006249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2455119680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO/c91ab7d1-b36e-45ca-8f1e-ad9ef0c38100.json b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO/c91ab7d1-b36e-45ca-8f1e-ad9ef0c38100.json new file mode 100644 index 000000000..5f7750677 --- /dev/null +++ b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO/c91ab7d1-b36e-45ca-8f1e-ad9ef0c38100.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-18-ORPO/1762652580.610698", + "retrieved_timestamp": "1762652580.610699", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO", + "developer": "google", + "inference_platform": "unknown", + "id": "ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47423502972113984 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40389353402379324 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04682779456193353 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3953333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21850066489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-18/78f235b0-fa98-48e2-bb03-9f7e9f986004.json b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-18/78f235b0-fa98-48e2-bb03-9f7e9f986004.json new file mode 100644 index 000000000..2314c9e0c --- /dev/null +++ b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-18/78f235b0-fa98-48e2-bb03-9f7e9f986004.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-18/1762652580.610494", + "retrieved_timestamp": "1762652580.610495", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ymcki/gemma-2-2b-jpn-it-abliterated-18", + "developer": "google", + "inference_platform": "unknown", + "id": "ymcki/gemma-2-2b-jpn-it-abliterated-18" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5175246124726836 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4132188791645781 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0445619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37415624999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25049867021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-24/4f0262d9-2a01-4127-bb40-1bbf437bbc07.json b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-24/4f0262d9-2a01-4127-bb40-1bbf437bbc07.json new file mode 100644 index 000000000..74e31d307 --- /dev/null +++ b/data/hfopenllm_v2/google/ymcki/gemma-2-2b-jpn-it-abliterated-24/4f0262d9-2a01-4127-bb40-1bbf437bbc07.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-24/1762652580.610902", + "retrieved_timestamp": "1762652580.610903", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ymcki/gemma-2-2b-jpn-it-abliterated-24", + "developer": "google", + "inference_platform": "unknown", + "id": "ymcki/gemma-2-2b-jpn-it-abliterated-24" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49786566310722213 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41096027770392857 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39148958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2473404255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zake7749/gemma-2-9b-it-chinese-kyara/827af354-0efb-4a44-b62a-c8562fd0065b.json b/data/hfopenllm_v2/google/zake7749/gemma-2-9b-it-chinese-kyara/827af354-0efb-4a44-b62a-c8562fd0065b.json new file mode 100644 index 000000000..d820e36bf --- /dev/null +++ b/data/hfopenllm_v2/google/zake7749/gemma-2-9b-it-chinese-kyara/827af354-0efb-4a44-b62a-c8562fd0065b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zake7749_gemma-2-9b-it-chinese-kyara/1762652580.612564", + "retrieved_timestamp": "1762652580.612565", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zake7749/gemma-2-9b-it-chinese-kyara", + "developer": "google", + "inference_platform": "unknown", + "id": "zake7749/gemma-2-9b-it-chinese-kyara" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17642965110351644 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5953692987878404 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10498489425981873 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4241979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41788563829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/Gemma-2-TM-9B/4d3c877e-3dea-44af-8133-d555355971f8.json b/data/hfopenllm_v2/google/zelk12/Gemma-2-TM-9B/4d3c877e-3dea-44af-8133-d555355971f8.json new file mode 100644 index 000000000..382c5b725 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/Gemma-2-TM-9B/4d3c877e-3dea-44af-8133-d555355971f8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_Gemma-2-TM-9B/1762652580.612811", + "retrieved_timestamp": "1762652580.612811", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/Gemma-2-TM-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/Gemma-2-TM-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8044621604010691 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5986592993557701 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20241691842900303 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3464765100671141 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41523958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40882646276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT-Gen1-gemma-2-9B/119f453d-714d-4324-aac5-8448bab91771.json b/data/hfopenllm_v2/google/zelk12/MT-Gen1-gemma-2-9B/119f453d-714d-4324-aac5-8448bab91771.json new file mode 100644 index 000000000..b97807cd8 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT-Gen1-gemma-2-9B/119f453d-714d-4324-aac5-8448bab91771.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen1-gemma-2-9B/1762652580.613055", + "retrieved_timestamp": "1762652580.613056", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT-Gen1-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT-Gen1-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7886252920029965 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6099997385328262 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22205438066465258 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3464765100671141 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4216875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4380817819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT-Gen2-GI-gemma-2-9B/0cf7e394-67e2-4ca3-ab2e-00cd4165eaf8.json b/data/hfopenllm_v2/google/zelk12/MT-Gen2-GI-gemma-2-9B/0cf7e394-67e2-4ca3-ab2e-00cd4165eaf8.json new file mode 100644 index 000000000..e3a3d4360 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT-Gen2-GI-gemma-2-9B/0cf7e394-67e2-4ca3-ab2e-00cd4165eaf8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen2-GI-gemma-2-9B/1762652580.613308", + "retrieved_timestamp": "1762652580.613309", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT-Gen2-GI-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT-Gen2-GI-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7913979352562313 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6095558882654465 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22054380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35067114093959734 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42832291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43558843085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT-Gen2-gemma-2-9B/6f5cbf98-67b4-4651-acee-160fe2e36f59.json b/data/hfopenllm_v2/google/zelk12/MT-Gen2-gemma-2-9B/6f5cbf98-67b4-4651-acee-160fe2e36f59.json new file mode 100644 index 000000000..00e616cc7 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT-Gen2-gemma-2-9B/6f5cbf98-67b4-4651-acee-160fe2e36f59.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen2-gemma-2-9B/1762652580.613527", + "retrieved_timestamp": "1762652580.613528", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT-Gen2-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT-Gen2-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7907485471881275 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6100494662695 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3464765100671141 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4322916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4387466755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT-Gen3-gemma-2-9B/79319862-c5eb-40a1-9424-ecc3835c1c9e.json b/data/hfopenllm_v2/google/zelk12/MT-Gen3-gemma-2-9B/79319862-c5eb-40a1-9424-ecc3835c1c9e.json new file mode 100644 index 000000000..7de311dfe --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT-Gen3-gemma-2-9B/79319862-c5eb-40a1-9424-ecc3835c1c9e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen3-gemma-2-9B/1762652580.613742", + "retrieved_timestamp": "1762652580.613743", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT-Gen3-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT-Gen3-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8020142111818863 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6097112889343964 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.229607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.348993288590604 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4216875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43558843085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT-Gen4-gemma-2-9B/7442a4c1-e225-4cea-b107-2d975460e214.json b/data/hfopenllm_v2/google/zelk12/MT-Gen4-gemma-2-9B/7442a4c1-e225-4cea-b107-2d975460e214.json new file mode 100644 index 000000000..e8088598e --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT-Gen4-gemma-2-9B/7442a4c1-e225-4cea-b107-2d975460e214.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen4-gemma-2-9B/1762652580.613958", + "retrieved_timestamp": "1762652580.6139588", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT-Gen4-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT-Gen4-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7883005979689446 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6109884725351095 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22356495468277945 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3548657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4228020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4387466755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT-Gen5-gemma-2-9B/4431b126-a8b8-4776-8dd5-448ec4fb0caf.json b/data/hfopenllm_v2/google/zelk12/MT-Gen5-gemma-2-9B/4431b126-a8b8-4776-8dd5-448ec4fb0caf.json new file mode 100644 index 000000000..3f2ca2a0c --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT-Gen5-gemma-2-9B/4431b126-a8b8-4776-8dd5-448ec4fb0caf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen5-gemma-2-9B/1762652580.614163", + "retrieved_timestamp": "1762652580.614163", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT-Gen5-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT-Gen5-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7923221496739761 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6132787046647334 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21525679758308158 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35151006711409394 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42016666666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4402426861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT-Gen6-gemma-2-9B/2dc22f82-e2fb-4690-b8e6-8c77b9bc9c45.json b/data/hfopenllm_v2/google/zelk12/MT-Gen6-gemma-2-9B/2dc22f82-e2fb-4690-b8e6-8c77b9bc9c45.json new file mode 100644 index 000000000..a341a3170 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT-Gen6-gemma-2-9B/2dc22f82-e2fb-4690-b8e6-8c77b9bc9c45.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen6-gemma-2-9B/1762652580.614364", + "retrieved_timestamp": "1762652580.6143649", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT-Gen6-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT-Gen6-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1615668648075994 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5844669261858688 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0823262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33305369127516776 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40692708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4165558510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT-Gen6fix-gemma-2-9B/0c2ec793-573d-4fb5-abc3-4aef4a8e2e72.json b/data/hfopenllm_v2/google/zelk12/MT-Gen6fix-gemma-2-9B/0c2ec793-573d-4fb5-abc3-4aef4a8e2e72.json new file mode 100644 index 000000000..e31b363e1 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT-Gen6fix-gemma-2-9B/0c2ec793-573d-4fb5-abc3-4aef4a8e2e72.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen6fix-gemma-2-9B/1762652580.614617", + "retrieved_timestamp": "1762652580.614618", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT-Gen6fix-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT-Gen6fix-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15759518078697854 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5917309697578781 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08157099697885196 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.337248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40841666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4119847074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT-Gen7-gemma-2-9B/29e65163-3e59-4bfe-a950-60092cb3171f.json b/data/hfopenllm_v2/google/zelk12/MT-Gen7-gemma-2-9B/29e65163-3e59-4bfe-a950-60092cb3171f.json new file mode 100644 index 000000000..6022ec690 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT-Gen7-gemma-2-9B/29e65163-3e59-4bfe-a950-60092cb3171f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen7-gemma-2-9B/1762652580.614857", + "retrieved_timestamp": "1762652580.614858", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT-Gen7-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT-Gen7-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16641289556155447 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5935242633580781 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33557046979865773 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40978125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4122340425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT-Max-Merge_02012025163610-gemma-2-9B/bfeb5972-e865-4892-b01b-0c92fdab79e9.json b/data/hfopenllm_v2/google/zelk12/MT-Max-Merge_02012025163610-gemma-2-9B/bfeb5972-e865-4892-b01b-0c92fdab79e9.json new file mode 100644 index 000000000..0e2e8786d --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT-Max-Merge_02012025163610-gemma-2-9B/bfeb5972-e865-4892-b01b-0c92fdab79e9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Max-Merge_02012025163610-gemma-2-9B/1762652580.6150799", + "retrieved_timestamp": "1762652580.615081", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT-Max-Merge_02012025163610-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT-Max-Merge_02012025163610-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7907485471881275 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6142243374633075 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2212990936555891 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35151006711409394 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4228020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4395777925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT-Merge-gemma-2-9B/8025c7ed-3553-489f-8858-091d1ff81a15.json b/data/hfopenllm_v2/google/zelk12/MT-Merge-gemma-2-9B/8025c7ed-3553-489f-8858-091d1ff81a15.json new file mode 100644 index 000000000..fba2dc13c --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT-Merge-gemma-2-9B/8025c7ed-3553-489f-8858-091d1ff81a15.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge-gemma-2-9B/1762652580.615297", + "retrieved_timestamp": "1762652580.615297", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT-Merge-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT-Merge-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8035379459833243 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6118379158679297 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22054380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34815436241610737 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.425625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43617021276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT-Merge1-gemma-2-9B/0e6d9dcd-e9b7-4638-ac0a-d0600fbb27d8.json b/data/hfopenllm_v2/google/zelk12/MT-Merge1-gemma-2-9B/0e6d9dcd-e9b7-4638-ac0a-d0600fbb27d8.json new file mode 100644 index 000000000..20019f265 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT-Merge1-gemma-2-9B/0e6d9dcd-e9b7-4638-ac0a-d0600fbb27d8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge1-gemma-2-9B/1762652580.615506", + "retrieved_timestamp": "1762652580.615506", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT-Merge1-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT-Merge1-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7901490268044344 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6099997385328262 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22885196374622357 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35151006711409394 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4243854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43741688829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/b149c82e-0099-46f6-a302-0eac4127f418.json b/data/hfopenllm_v2/google/zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/b149c82e-0099-46f6-a302-0eac4127f418.json new file mode 100644 index 000000000..9c3466b8e --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/b149c82e-0099-46f6-a302-0eac4127f418.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/1762652580.615718", + "retrieved_timestamp": "1762652580.615718", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7955945779420825 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.60838922159878 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21827794561933533 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35067114093959734 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43222916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.437250664893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT-Merge2-gemma-2-9B/75c81dae-2bb9-4d60-94e2-61141c31ccbd.json b/data/hfopenllm_v2/google/zelk12/MT-Merge2-gemma-2-9B/75c81dae-2bb9-4d60-94e2-61141c31ccbd.json new file mode 100644 index 000000000..483bd8027 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT-Merge2-gemma-2-9B/75c81dae-2bb9-4d60-94e2-61141c31ccbd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge2-gemma-2-9B/1762652580.615932", + "retrieved_timestamp": "1762652580.615933", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT-Merge2-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT-Merge2-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7877010775852515 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6106681877306871 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2348942598187311 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35067114093959734 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4216875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43816489361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT-Merge3-gemma-2-9B/c2bad77e-c0d0-4a43-8853-9363cc618603.json b/data/hfopenllm_v2/google/zelk12/MT-Merge3-gemma-2-9B/c2bad77e-c0d0-4a43-8853-9363cc618603.json new file mode 100644 index 000000000..bbb9c2c47 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT-Merge3-gemma-2-9B/c2bad77e-c0d0-4a43-8853-9363cc618603.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge3-gemma-2-9B/1762652580.6161401", + "retrieved_timestamp": "1762652580.616141", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT-Merge3-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT-Merge3-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7858526487497617 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6102112889343964 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22054380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.348993288590604 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42575 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4373337765957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT-Merge4-gemma-2-9B/7b515db9-e76c-495f-b4f8-a65b913f40e9.json b/data/hfopenllm_v2/google/zelk12/MT-Merge4-gemma-2-9B/7b515db9-e76c-495f-b4f8-a65b913f40e9.json new file mode 100644 index 000000000..9b8f5be8f --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT-Merge4-gemma-2-9B/7b515db9-e76c-495f-b4f8-a65b913f40e9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge4-gemma-2-9B/1762652580.616342", + "retrieved_timestamp": "1762652580.616342", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT-Merge4-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT-Merge4-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7807317916461656 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6118218058684427 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21676737160120846 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3523489932885906 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42943749999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43899601063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT-Merge5-gemma-2-9B/f9e1d208-d1ab-4518-9b1b-1470af8bef12.json b/data/hfopenllm_v2/google/zelk12/MT-Merge5-gemma-2-9B/f9e1d208-d1ab-4518-9b1b-1470af8bef12.json new file mode 100644 index 000000000..a460e1d64 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT-Merge5-gemma-2-9B/f9e1d208-d1ab-4518-9b1b-1470af8bef12.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge5-gemma-2-9B/1762652580.616543", + "retrieved_timestamp": "1762652580.616544", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT-Merge5-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT-Merge5-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7843787816327346 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6122674386670167 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21827794561933533 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35318791946308725 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42813541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4387466755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT-Merge6-gemma-2-9B/3c796c74-d79c-4c9f-a5ab-dee6c237bde1.json b/data/hfopenllm_v2/google/zelk12/MT-Merge6-gemma-2-9B/3c796c74-d79c-4c9f-a5ab-dee6c237bde1.json new file mode 100644 index 000000000..d86f6b21d --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT-Merge6-gemma-2-9B/3c796c74-d79c-4c9f-a5ab-dee6c237bde1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge6-gemma-2-9B/1762652580.6167512", + "retrieved_timestamp": "1762652580.6167512", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT-Merge6-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT-Merge6-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16946036516443036 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5949106849534558 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08006042296072508 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288590604026846 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40978125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41148603723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT-gemma-2-9B/061fc038-b3fd-4d5b-8ab7-7f3713ad9e55.json b/data/hfopenllm_v2/google/zelk12/MT-gemma-2-9B/061fc038-b3fd-4d5b-8ab7-7f3713ad9e55.json new file mode 100644 index 000000000..47a3fa2f1 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT-gemma-2-9B/061fc038-b3fd-4d5b-8ab7-7f3713ad9e55.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT-gemma-2-9B/1762652580.616956", + "retrieved_timestamp": "1762652580.616957", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7968434863938794 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6063604478633632 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2054380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34563758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40711458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42237367021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT1-Gen1-gemma-2-9B/b869eab0-f736-48ef-8870-b98636cc4da1.json b/data/hfopenllm_v2/google/zelk12/MT1-Gen1-gemma-2-9B/b869eab0-f736-48ef-8870-b98636cc4da1.json new file mode 100644 index 000000000..6755168ff --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT1-Gen1-gemma-2-9B/b869eab0-f736-48ef-8870-b98636cc4da1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen1-gemma-2-9B/1762652580.617173", + "retrieved_timestamp": "1762652580.617174", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT1-Gen1-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT1-Gen1-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7974430067775724 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6117787046647335 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2243202416918429 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34395973154362414 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43095833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43758311170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT1-Gen2-gemma-2-9B/2871c1f6-4010-48e4-8020-1c5024474934.json b/data/hfopenllm_v2/google/zelk12/MT1-Gen2-gemma-2-9B/2871c1f6-4010-48e4-8020-1c5024474934.json new file mode 100644 index 000000000..73d06534a --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT1-Gen2-gemma-2-9B/2871c1f6-4010-48e4-8020-1c5024474934.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen2-gemma-2-9B/1762652580.617375", + "retrieved_timestamp": "1762652580.617376", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT1-Gen2-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT1-Gen2-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7983672211953173 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6095989894691557 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22507552870090636 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3523489932885906 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42835416666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43550531914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT1-Gen3-gemma-2-9B/69b008dd-f8ad-49ce-9bca-fff2e2ce6b72.json b/data/hfopenllm_v2/google/zelk12/MT1-Gen3-gemma-2-9B/69b008dd-f8ad-49ce-9bca-fff2e2ce6b72.json new file mode 100644 index 000000000..293264853 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT1-Gen3-gemma-2-9B/69b008dd-f8ad-49ce-9bca-fff2e2ce6b72.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen3-gemma-2-9B/1762652580.617578", + "retrieved_timestamp": "1762652580.617579", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT1-Gen3-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT1-Gen3-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.795969139660545 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6101551392017761 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2243202416918429 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.348993288590604 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42432291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43492353723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT1-Gen4-gemma-2-9B/e10f8a93-7131-446d-b792-d179f522a262.json b/data/hfopenllm_v2/google/zelk12/MT1-Gen4-gemma-2-9B/e10f8a93-7131-446d-b792-d179f522a262.json new file mode 100644 index 000000000..b2730bf8e --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT1-Gen4-gemma-2-9B/e10f8a93-7131-446d-b792-d179f522a262.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen4-gemma-2-9B/1762652580.617781", + "retrieved_timestamp": "1762652580.617782", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT1-Gen4-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT1-Gen4-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7941207108250552 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6057567677609054 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21601208459214502 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34731543624161076 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42311458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42860704787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B/182a7558-c9f7-43a6-a928-d5d97e082a91.json b/data/hfopenllm_v2/google/zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B/182a7558-c9f7-43a6-a928-d5d97e082a91.json new file mode 100644 index 000000000..cf823339b --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B/182a7558-c9f7-43a6-a928-d5d97e082a91.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen5-IF-gemma-2-S2DMv1-9B/1762652580.617982", + "retrieved_timestamp": "1762652580.6179829", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7929216700576691 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6000001533684681 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20317220543806647 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34395973154362414 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4244791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42179188829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT1-Gen5-gemma-2-9B/46f2caf1-29e8-4173-b2b2-e54e905e71d9.json b/data/hfopenllm_v2/google/zelk12/MT1-Gen5-gemma-2-9B/46f2caf1-29e8-4173-b2b2-e54e905e71d9.json new file mode 100644 index 000000000..54c5be383 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT1-Gen5-gemma-2-9B/46f2caf1-29e8-4173-b2b2-e54e905e71d9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen5-gemma-2-9B/1762652580.618199", + "retrieved_timestamp": "1762652580.6182", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT1-Gen5-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT1-Gen5-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7794828831943688 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6017455017631886 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20770392749244712 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3464765100671141 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41914583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42220744680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT1-Gen6-gemma-2-9B/fcf4087e-9d89-4e8a-a817-6c9092445208.json b/data/hfopenllm_v2/google/zelk12/MT1-Gen6-gemma-2-9B/fcf4087e-9d89-4e8a-a817-6c9092445208.json new file mode 100644 index 000000000..a585f0f12 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT1-Gen6-gemma-2-9B/fcf4087e-9d89-4e8a-a817-6c9092445208.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen6-gemma-2-9B/1762652580.618452", + "retrieved_timestamp": "1762652580.618453", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT1-Gen6-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT1-Gen6-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16336542595867853 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5943545352208355 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08081570996978851 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40444791666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4133144946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT1-Gen7-gemma-2-9B/5b8bdeea-19cf-41c0-890a-55ae1b740e75.json b/data/hfopenllm_v2/google/zelk12/MT1-Gen7-gemma-2-9B/5b8bdeea-19cf-41c0-890a-55ae1b740e75.json new file mode 100644 index 000000000..7c6afdc0d --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT1-Gen7-gemma-2-9B/5b8bdeea-19cf-41c0-890a-55ae1b740e75.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen7-gemma-2-9B/1762652580.6186602", + "retrieved_timestamp": "1762652580.6186612", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT1-Gen7-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT1-Gen7-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16336542595867853 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5937953240176393 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41111458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4144780585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B/01fcc284-cedc-48b7-bc21-b8ec6dd53d3c.json b/data/hfopenllm_v2/google/zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B/01fcc284-cedc-48b7-bc21-b8ec6dd53d3c.json new file mode 100644 index 000000000..50563e4b8 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B/01fcc284-cedc-48b7-bc21-b8ec6dd53d3c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT1-Max-Merge_02012025163610-gemma-2-9B/1762652580.618859", + "retrieved_timestamp": "1762652580.61886", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7928718023732585 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6122674386670167 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22280966767371602 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3548657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4255 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43816489361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT1-gemma-2-9B/17cda965-9f4b-411c-977f-1fe3238f527f.json b/data/hfopenllm_v2/google/zelk12/MT1-gemma-2-9B/17cda965-9f4b-411c-977f-1fe3238f527f.json new file mode 100644 index 000000000..6de14f542 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT1-gemma-2-9B/17cda965-9f4b-411c-977f-1fe3238f527f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT1-gemma-2-9B/1762652580.619083", + "retrieved_timestamp": "1762652580.6190841", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT1-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT1-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7946703635243377 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6108745950756924 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22356495468277945 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34563758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43222916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4357546542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT2-Gen1-gemma-2-9B/e6c0f96c-6189-4ed1-bf68-e762249170e7.json b/data/hfopenllm_v2/google/zelk12/MT2-Gen1-gemma-2-9B/e6c0f96c-6189-4ed1-bf68-e762249170e7.json new file mode 100644 index 000000000..c7da51e46 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT2-Gen1-gemma-2-9B/e6c0f96c-6189-4ed1-bf68-e762249170e7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen1-gemma-2-9B/1762652580.619495", + "retrieved_timestamp": "1762652580.619499", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT2-Gen1-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT2-Gen1-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7855778224001206 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6100802027920743 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2212990936555891 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34312080536912754 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42432291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4376662234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT2-Gen2-gemma-2-9B/556a83e2-9b7c-432e-99d5-804da880dfc6.json b/data/hfopenllm_v2/google/zelk12/MT2-Gen2-gemma-2-9B/556a83e2-9b7c-432e-99d5-804da880dfc6.json new file mode 100644 index 000000000..11b5d7768 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT2-Gen2-gemma-2-9B/556a83e2-9b7c-432e-99d5-804da880dfc6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen2-gemma-2-9B/1762652580.6198761", + "retrieved_timestamp": "1762652580.619877", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT2-Gen2-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT2-Gen2-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7889001183526376 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6092917531936446 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21827794561933533 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3464765100671141 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42702083333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43882978723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT2-Gen3-gemma-2-9B/1aa85069-5409-4c32-91d5-1f417be4e465.json b/data/hfopenllm_v2/google/zelk12/MT2-Gen3-gemma-2-9B/1aa85069-5409-4c32-91d5-1f417be4e465.json new file mode 100644 index 000000000..825d5351f --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT2-Gen3-gemma-2-9B/1aa85069-5409-4c32-91d5-1f417be4e465.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen3-gemma-2-9B/1762652580.620111", + "retrieved_timestamp": "1762652580.620112", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT2-Gen3-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT2-Gen3-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7810066179958066 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6104772065373926 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2107250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3464765100671141 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4230833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43741688829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT2-Gen4-gemma-2-9B/eb55e4d5-dde4-4349-b8aa-9297604cedf0.json b/data/hfopenllm_v2/google/zelk12/MT2-Gen4-gemma-2-9B/eb55e4d5-dde4-4349-b8aa-9297604cedf0.json new file mode 100644 index 000000000..eb3382521 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT2-Gen4-gemma-2-9B/eb55e4d5-dde4-4349-b8aa-9297604cedf0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen4-gemma-2-9B/1762652580.620331", + "retrieved_timestamp": "1762652580.620331", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT2-Gen4-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT2-Gen4-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7895993741051521 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.609655139201776 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22356495468277945 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34563758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41254166666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43209773936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT2-Gen5-gemma-2-9B/3f7eb2b4-8dfb-4bf5-a462-0c11ccbae935.json b/data/hfopenllm_v2/google/zelk12/MT2-Gen5-gemma-2-9B/3f7eb2b4-8dfb-4bf5-a462-0c11ccbae935.json new file mode 100644 index 000000000..fbd77c18b --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT2-Gen5-gemma-2-9B/3f7eb2b4-8dfb-4bf5-a462-0c11ccbae935.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen5-gemma-2-9B/1762652580.6205592", + "retrieved_timestamp": "1762652580.6205592", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT2-Gen5-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT2-Gen5-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7749116787900548 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6063933817527739 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2107250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35151006711409394 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42441666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43018617021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT2-Gen6-gemma-2-9B/35e1f76a-96d6-42af-a51b-b1b453536723.json b/data/hfopenllm_v2/google/zelk12/MT2-Gen6-gemma-2-9B/35e1f76a-96d6-42af-a51b-b1b453536723.json new file mode 100644 index 000000000..a54c87074 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT2-Gen6-gemma-2-9B/35e1f76a-96d6-42af-a51b-b1b453536723.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen6-gemma-2-9B/1762652580.620769", + "retrieved_timestamp": "1762652580.620769", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT2-Gen6-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT2-Gen6-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16641289556155447 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.595964957637105 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41371874999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42096077127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT2-Gen7-gemma-2-9B/4b9e66cf-0ddb-4878-8800-2bc05dec750a.json b/data/hfopenllm_v2/google/zelk12/MT2-Gen7-gemma-2-9B/4b9e66cf-0ddb-4878-8800-2bc05dec750a.json new file mode 100644 index 000000000..9a38a2b48 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT2-Gen7-gemma-2-9B/4b9e66cf-0ddb-4878-8800-2bc05dec750a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen7-gemma-2-9B/1762652580.621203", + "retrieved_timestamp": "1762652580.621205", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT2-Gen7-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT2-Gen7-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17615482475387528 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6078922830693557 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10196374622356495 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3548657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42032291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4311003989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B/2144960d-f674-45bd-9509-3cf711dc697b.json b/data/hfopenllm_v2/google/zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B/2144960d-f674-45bd-9509-3cf711dc697b.json new file mode 100644 index 000000000..142b173c5 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B/2144960d-f674-45bd-9509-3cf711dc697b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT2-Max-Merge_02012025163610-gemma-2-9B/1762652580.6214652", + "retrieved_timestamp": "1762652580.6214678", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7901490268044344 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6108461203950706 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2243202416918429 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35151006711409394 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42283333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4390791223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT2-gemma-2-9B/0644b140-506f-4c7a-ba59-50ab48fad799.json b/data/hfopenllm_v2/google/zelk12/MT2-gemma-2-9B/0644b140-506f-4c7a-ba59-50ab48fad799.json new file mode 100644 index 000000000..4541304e5 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT2-gemma-2-9B/0644b140-506f-4c7a-ba59-50ab48fad799.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT2-gemma-2-9B/1762652580.6217349", + "retrieved_timestamp": "1762652580.621736", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT2-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT2-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7885754243185858 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.611511004530543 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2212990936555891 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34731543624161076 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42165625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43683510638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT3-Gen1-gemma-2-9B/1964f25a-d5b2-467a-a30d-9338082bdcfb.json b/data/hfopenllm_v2/google/zelk12/MT3-Gen1-gemma-2-9B/1964f25a-d5b2-467a-a30d-9338082bdcfb.json new file mode 100644 index 000000000..13c215397 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT3-Gen1-gemma-2-9B/1964f25a-d5b2-467a-a30d-9338082bdcfb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen1-gemma-2-9B/1762652580.6219652", + "retrieved_timestamp": "1762652580.6219661", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT3-Gen1-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT3-Gen1-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7837792612490415 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6106760932030332 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21450151057401812 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3464765100671141 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41511458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43267952127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT3-Gen2-gemma-2-9B/55315256-9b4d-4dbd-bc53-7ec384e0fdca.json b/data/hfopenllm_v2/google/zelk12/MT3-Gen2-gemma-2-9B/55315256-9b4d-4dbd-bc53-7ec384e0fdca.json new file mode 100644 index 000000000..f1c6bae89 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT3-Gen2-gemma-2-9B/55315256-9b4d-4dbd-bc53-7ec384e0fdca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen2-gemma-2-9B/1762652580.622196", + "retrieved_timestamp": "1762652580.622197", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT3-Gen2-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT3-Gen2-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7843289139483238 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6091473194676166 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22356495468277945 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3573825503355705 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41111458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43326130319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT3-Gen3-gemma-2-9B/71710546-99cb-4180-9454-1e77696fccf3.json b/data/hfopenllm_v2/google/zelk12/MT3-Gen3-gemma-2-9B/71710546-99cb-4180-9454-1e77696fccf3.json new file mode 100644 index 000000000..12159a64f --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT3-Gen3-gemma-2-9B/71710546-99cb-4180-9454-1e77696fccf3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen3-gemma-2-9B/1762652580.622438", + "retrieved_timestamp": "1762652580.622439", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT3-Gen3-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT3-Gen3-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7856276900845313 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6088892215987798 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21525679758308158 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35151006711409394 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42575 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4302692819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT3-Gen4-gemma-2-9B/96b38b17-8c70-4ecf-beb5-8e6ed84942ac.json b/data/hfopenllm_v2/google/zelk12/MT3-Gen4-gemma-2-9B/96b38b17-8c70-4ecf-beb5-8e6ed84942ac.json new file mode 100644 index 000000000..c229f051a --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT3-Gen4-gemma-2-9B/96b38b17-8c70-4ecf-beb5-8e6ed84942ac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen4-gemma-2-9B/1762652580.6226869", + "retrieved_timestamp": "1762652580.622689", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT3-Gen4-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT3-Gen4-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7737126380226687 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6100843629460684 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20619335347432025 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34731543624161076 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4476354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4387466755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT3-Gen5-gemma-2-9B/53dc50c8-fa89-4d31-92d6-f8b02543e272.json b/data/hfopenllm_v2/google/zelk12/MT3-Gen5-gemma-2-9B/53dc50c8-fa89-4d31-92d6-f8b02543e272.json new file mode 100644 index 000000000..479c08a33 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT3-Gen5-gemma-2-9B/53dc50c8-fa89-4d31-92d6-f8b02543e272.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen5-gemma-2-9B/1762652580.622956", + "retrieved_timestamp": "1762652580.622956", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT3-Gen5-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT3-Gen5-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7990166092634211 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6098615465467813 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22658610271903323 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35318791946308725 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41911458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43168218085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT3-Gen5-gemma-2-9B_v1/95fe9cce-c93d-47e3-a053-defe922abefa.json b/data/hfopenllm_v2/google/zelk12/MT3-Gen5-gemma-2-9B_v1/95fe9cce-c93d-47e3-a053-defe922abefa.json new file mode 100644 index 000000000..27a4fac29 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT3-Gen5-gemma-2-9B_v1/95fe9cce-c93d-47e3-a053-defe922abefa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen5-gemma-2-9B_v1/1762652580.623179", + "retrieved_timestamp": "1762652580.623179", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT3-Gen5-gemma-2-9B_v1", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT3-Gen5-gemma-2-9B_v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7996161296471141 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6113330718661595 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22280966767371602 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.348993288590604 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4203854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4359208776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT3-Gen6-gemma-2-9B/9f093c1a-eabc-4ee3-9e43-9ac0bc3afa08.json b/data/hfopenllm_v2/google/zelk12/MT3-Gen6-gemma-2-9B/9f093c1a-eabc-4ee3-9e43-9ac0bc3afa08.json new file mode 100644 index 000000000..8904c0b51 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT3-Gen6-gemma-2-9B/9f093c1a-eabc-4ee3-9e43-9ac0bc3afa08.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen6-gemma-2-9B/1762652580.623395", + "retrieved_timestamp": "1762652580.623395", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT3-Gen6-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT3-Gen6-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17615482475387528 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6020072592121909 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08836858006042296 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34312080536912754 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4125729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41023936170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B/42e21a24-7c3c-4e65-ad6e-0b18f6c048eb.json b/data/hfopenllm_v2/google/zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B/42e21a24-7c3c-4e65-ad6e-0b18f6c048eb.json new file mode 100644 index 000000000..c97bd6912 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B/42e21a24-7c3c-4e65-ad6e-0b18f6c048eb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT3-Max-Merge_02012025163610-gemma-2-9B/1762652580.623601", + "retrieved_timestamp": "1762652580.623602", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17615482475387528 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6123461203950705 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10120845921450151 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35067114093959734 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42546875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4389128989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT3-gemma-2-9B/0b8f178b-9980-4250-bc82-66facb367eb8.json b/data/hfopenllm_v2/google/zelk12/MT3-gemma-2-9B/0b8f178b-9980-4250-bc82-66facb367eb8.json new file mode 100644 index 000000000..3e69d166c --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT3-gemma-2-9B/0b8f178b-9980-4250-bc82-66facb367eb8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT3-gemma-2-9B/1762652580.623819", + "retrieved_timestamp": "1762652580.62382", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT3-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT3-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7786085364610345 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.61307842026088 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21676737160120846 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3447986577181208 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4242916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43267952127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT4-Gen1-gemma-2-9B/6e5b6be6-cc1d-4a03-8e5e-eeede4ee4298.json b/data/hfopenllm_v2/google/zelk12/MT4-Gen1-gemma-2-9B/6e5b6be6-cc1d-4a03-8e5e-eeede4ee4298.json new file mode 100644 index 000000000..8b337d546 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT4-Gen1-gemma-2-9B/6e5b6be6-cc1d-4a03-8e5e-eeede4ee4298.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen1-gemma-2-9B/1762652580.624031", + "retrieved_timestamp": "1762652580.624032", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT4-Gen1-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT4-Gen1-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7894996387363307 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6093827996028333 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21978851963746224 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34395973154362414 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43222916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4389128989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT4-Gen2-gemma-2-9B/e7f0b28a-32c6-4faf-9cb4-c2ee4a075135.json b/data/hfopenllm_v2/google/zelk12/MT4-Gen2-gemma-2-9B/e7f0b28a-32c6-4faf-9cb4-c2ee4a075135.json new file mode 100644 index 000000000..204c5eade --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT4-Gen2-gemma-2-9B/e7f0b28a-32c6-4faf-9cb4-c2ee4a075135.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen2-gemma-2-9B/1762652580.6242292", + "retrieved_timestamp": "1762652580.62423", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT4-Gen2-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT4-Gen2-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8050616807847621 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6108348543973539 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2326283987915408 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34563758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42565625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4367519946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT4-Gen3-gemma-2-9B/b84ca7e1-4746-449a-841f-fcfd71774104.json b/data/hfopenllm_v2/google/zelk12/MT4-Gen3-gemma-2-9B/b84ca7e1-4746-449a-841f-fcfd71774104.json new file mode 100644 index 000000000..9f3bd3e13 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT4-Gen3-gemma-2-9B/b84ca7e1-4746-449a-841f-fcfd71774104.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen3-gemma-2-9B/1762652580.624489", + "retrieved_timestamp": "1762652580.62449", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT4-Gen3-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT4-Gen3-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7840540875986826 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6087112889343964 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34395973154362414 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42432291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4380817819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT4-Gen4-gemma-2-9B/b38dc953-12fb-41aa-a887-d9a30ff1799a.json b/data/hfopenllm_v2/google/zelk12/MT4-Gen4-gemma-2-9B/b38dc953-12fb-41aa-a887-d9a30ff1799a.json new file mode 100644 index 000000000..638b932fc --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT4-Gen4-gemma-2-9B/b38dc953-12fb-41aa-a887-d9a30ff1799a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen4-gemma-2-9B/1762652580.6246998", + "retrieved_timestamp": "1762652580.624701", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT4-Gen4-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT4-Gen4-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7874262512356104 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6076031496231499 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21450151057401812 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3523489932885906 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42435416666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4323470744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT4-Gen5-gemma-2-9B/4a35f213-f9b7-40c5-b164-722f6b4ee933.json b/data/hfopenllm_v2/google/zelk12/MT4-Gen5-gemma-2-9B/4a35f213-f9b7-40c5-b164-722f6b4ee933.json new file mode 100644 index 000000000..01a571449 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT4-Gen5-gemma-2-9B/4a35f213-f9b7-40c5-b164-722f6b4ee933.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen5-gemma-2-9B/1762652580.6249092", + "retrieved_timestamp": "1762652580.62491", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT4-Gen5-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT4-Gen5-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7788833628106757 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6106664051994928 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22658610271903323 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3565436241610738 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42683333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43841422872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B/ae4224f6-36e8-48e2-a0bf-a79299c365ad.json b/data/hfopenllm_v2/google/zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B/ae4224f6-36e8-48e2-a0bf-a79299c365ad.json new file mode 100644 index 000000000..17a26f90b --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B/ae4224f6-36e8-48e2-a0bf-a79299c365ad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT4-Max-Merge_02012025163610-gemma-2-9B/1762652580.625107", + "retrieved_timestamp": "1762652580.625107", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1770790391716202 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6120127870617372 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09516616314199396 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35151006711409394 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4228020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4390791223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT4-gemma-2-9B/a312ee46-fd2f-4a0d-a778-7e235910a147.json b/data/hfopenllm_v2/google/zelk12/MT4-gemma-2-9B/a312ee46-fd2f-4a0d-a778-7e235910a147.json new file mode 100644 index 000000000..3bed200d4 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT4-gemma-2-9B/a312ee46-fd2f-4a0d-a778-7e235910a147.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT4-gemma-2-9B/1762652580.62533", + "retrieved_timestamp": "1762652580.625331", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT4-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT4-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7761605872418517 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.607313601341302 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43092708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43658577127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT5-Gen1-gemma-2-9B/b311d3f4-6eda-4053-91d2-416c4d796c6d.json b/data/hfopenllm_v2/google/zelk12/MT5-Gen1-gemma-2-9B/b311d3f4-6eda-4053-91d2-416c4d796c6d.json new file mode 100644 index 000000000..e8cc60221 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT5-Gen1-gemma-2-9B/b311d3f4-6eda-4053-91d2-416c4d796c6d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen1-gemma-2-9B/1762652580.625538", + "retrieved_timestamp": "1762652580.625539", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT5-Gen1-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT5-Gen1-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7831298731809377 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6110476837383056 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2212990936555891 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34731543624161076 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4203854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43683510638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT5-Gen2-gemma-2-9B/d59d00da-e88f-4d1a-9c47-538020ae0114.json b/data/hfopenllm_v2/google/zelk12/MT5-Gen2-gemma-2-9B/d59d00da-e88f-4d1a-9c47-538020ae0114.json new file mode 100644 index 000000000..d5d80e8e6 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT5-Gen2-gemma-2-9B/d59d00da-e88f-4d1a-9c47-538020ae0114.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen2-gemma-2-9B/1762652580.625738", + "retrieved_timestamp": "1762652580.625739", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT5-Gen2-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT5-Gen2-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7962439660101863 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.610541261742359 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22054380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35151006711409394 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41629166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4379155585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT5-Gen3-gemma-2-9B/1ff959c7-3477-40e5-8460-971337adc788.json b/data/hfopenllm_v2/google/zelk12/MT5-Gen3-gemma-2-9B/1ff959c7-3477-40e5-8460-971337adc788.json new file mode 100644 index 000000000..585f25eb3 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT5-Gen3-gemma-2-9B/1ff959c7-3477-40e5-8460-971337adc788.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen3-gemma-2-9B/1762652580.625941", + "retrieved_timestamp": "1762652580.625942", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT5-Gen3-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT5-Gen3-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7825303527972447 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6090494662695 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21676737160120846 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35151006711409394 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42305208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT5-Gen4-gemma-2-9B/6cbd7c31-df0a-4920-9c23-be53f107698e.json b/data/hfopenllm_v2/google/zelk12/MT5-Gen4-gemma-2-9B/6cbd7c31-df0a-4920-9c23-be53f107698e.json new file mode 100644 index 000000000..b6a3c30ec --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT5-Gen4-gemma-2-9B/6cbd7c31-df0a-4920-9c23-be53f107698e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen4-gemma-2-9B/1762652580.62615", + "retrieved_timestamp": "1762652580.6261508", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT5-Gen4-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT5-Gen4-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7834545672149895 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6131056160021203 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2243202416918429 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35318791946308725 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42283333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4396609042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT5-Gen5-gemma-2-9B/b4ca4df6-2631-4ba3-bb55-8eadec5dd348.json b/data/hfopenllm_v2/google/zelk12/MT5-Gen5-gemma-2-9B/b4ca4df6-2631-4ba3-bb55-8eadec5dd348.json new file mode 100644 index 000000000..cfbcae770 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT5-Gen5-gemma-2-9B/b4ca4df6-2631-4ba3-bb55-8eadec5dd348.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen5-gemma-2-9B/1762652580.6263602", + "retrieved_timestamp": "1762652580.6263611", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT5-Gen5-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT5-Gen5-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7947202312087482 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6111664051994928 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2258308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34815436241610737 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41911458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43292885638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B/6737b327-bd1c-4eee-a461-af685edcd7b5.json b/data/hfopenllm_v2/google/zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B/6737b327-bd1c-4eee-a461-af685edcd7b5.json new file mode 100644 index 000000000..6abf38344 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B/6737b327-bd1c-4eee-a461-af685edcd7b5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT5-Max-Merge_02012025163610-gemma-2-9B/1762652580.62657", + "retrieved_timestamp": "1762652580.62657", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17615482475387528 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6126794537284038 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09818731117824774 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35151006711409394 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4227708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43899601063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MT5-gemma-2-9B/dd306da8-60aa-4022-8d04-1942fd19bc0b.json b/data/hfopenllm_v2/google/zelk12/MT5-gemma-2-9B/dd306da8-60aa-4022-8d04-1942fd19bc0b.json new file mode 100644 index 000000000..d8a5be6e5 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MT5-gemma-2-9B/dd306da8-60aa-4022-8d04-1942fd19bc0b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MT5-gemma-2-9B/1762652580.6267788", + "retrieved_timestamp": "1762652580.6267798", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MT5-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MT5-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8047868544351211 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6112225549321132 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2258308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34312080536912754 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4203854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4366688829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MTM-Merge-gemma-2-9B/e0354dac-3ad8-4342-92a9-be0182051cac.json b/data/hfopenllm_v2/google/zelk12/MTM-Merge-gemma-2-9B/e0354dac-3ad8-4342-92a9-be0182051cac.json new file mode 100644 index 000000000..e95e65d61 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MTM-Merge-gemma-2-9B/e0354dac-3ad8-4342-92a9-be0182051cac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MTM-Merge-gemma-2-9B/1762652580.626984", + "retrieved_timestamp": "1762652580.626985", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MTM-Merge-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MTM-Merge-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7798075772284205 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6133348543973538 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2175226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3548657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4267708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43882978723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B/b1a8ede3-2f27-4825-a413-e1772743b7c6.json b/data/hfopenllm_v2/google/zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B/b1a8ede3-2f27-4825-a413-e1772743b7c6.json new file mode 100644 index 000000000..98933922b --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B/b1a8ede3-2f27-4825-a413-e1772743b7c6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_MTMaMe-Merge_02012025163610-gemma-2-9B/1762652580.627192", + "retrieved_timestamp": "1762652580.627192", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17860277397305815 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6116794537284039 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09592145015105741 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3523489932885906 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42410416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43816489361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/Rv0.4DMv1t0.25-gemma-2-9B/522e1145-3f25-4b5d-9b6a-7ad0047b2da5.json b/data/hfopenllm_v2/google/zelk12/Rv0.4DMv1t0.25-gemma-2-9B/522e1145-3f25-4b5d-9b6a-7ad0047b2da5.json new file mode 100644 index 000000000..0a77c0acf --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/Rv0.4DMv1t0.25-gemma-2-9B/522e1145-3f25-4b5d-9b6a-7ad0047b2da5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_Rv0.4DMv1t0.25-gemma-2-9B/1762652580.627404", + "retrieved_timestamp": "1762652580.627404", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/Rv0.4DMv1t0.25-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/Rv0.4DMv1t0.25-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7496575752337131 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6069712638522043 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2258308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34563758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43092708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44007646276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/64790745-5edc-49d9-8111-822d54518b58.json b/data/hfopenllm_v2/google/zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/64790745-5edc-49d9-8111-822d54518b58.json new file mode 100644 index 000000000..7c86ae516 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/64790745-5edc-49d9-8111-822d54518b58.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/1762652580.627618", + "retrieved_timestamp": "1762652580.627619", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7646200968984517 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6097862253440982 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20694864048338368 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3422818791946309 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4282916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43467420212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/Rv0.4MT4g2-gemma-2-9B/7e232332-cf13-4127-be18-1311921931e6.json b/data/hfopenllm_v2/google/zelk12/Rv0.4MT4g2-gemma-2-9B/7e232332-cf13-4127-be18-1311921931e6.json new file mode 100644 index 000000000..5b83fe416 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/Rv0.4MT4g2-gemma-2-9B/7e232332-cf13-4127-be18-1311921931e6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_Rv0.4MT4g2-gemma-2-9B/1762652580.627839", + "retrieved_timestamp": "1762652580.62784", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/Rv0.4MT4g2-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/Rv0.4MT4g2-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7320221456845614 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.604119644415618 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19486404833836857 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35318791946308725 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4230833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44173869680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/T31122024203920-gemma-2-9B/f1312aef-339c-487a-b0fa-1bf4a77f0910.json b/data/hfopenllm_v2/google/zelk12/T31122024203920-gemma-2-9B/f1312aef-339c-487a-b0fa-1bf4a77f0910.json new file mode 100644 index 000000000..85226854a --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/T31122024203920-gemma-2-9B/f1312aef-339c-487a-b0fa-1bf4a77f0910.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_T31122024203920-gemma-2-9B/1762652580.628056", + "retrieved_timestamp": "1762652580.628057", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/T31122024203920-gemma-2-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/T31122024203920-gemma-2-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7676176988169169 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6095634089448112 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2054380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35067114093959734 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4321979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.437250664893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/Test01012025155054t0.5_gemma-2/73f07833-1d35-484f-8fe3-57f4c27e1277.json b/data/hfopenllm_v2/google/zelk12/Test01012025155054t0.5_gemma-2/73f07833-1d35-484f-8fe3-57f4c27e1277.json new file mode 100644 index 000000000..b85e98265 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/Test01012025155054t0.5_gemma-2/73f07833-1d35-484f-8fe3-57f4c27e1277.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_Test01012025155054t0.5_gemma-2/1762652580.628514", + "retrieved_timestamp": "1762652580.628514", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/Test01012025155054t0.5_gemma-2", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/Test01012025155054t0.5_gemma-2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1555229014570229 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28295044895258115 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24161073825503357 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36702083333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10904255319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 3.817 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/gemma-2-S2MTM-9B/e0eb1bbf-923b-4bee-8390-288c21607e0e.json b/data/hfopenllm_v2/google/zelk12/gemma-2-S2MTM-9B/e0eb1bbf-923b-4bee-8390-288c21607e0e.json new file mode 100644 index 000000000..f85ad77ed --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/gemma-2-S2MTM-9B/e0eb1bbf-923b-4bee-8390-288c21607e0e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_gemma-2-S2MTM-9B/1762652580.628712", + "retrieved_timestamp": "1762652580.628713", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/gemma-2-S2MTM-9B", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/gemma-2-S2MTM-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7822555264476034 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6060836790982922 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20468277945619334 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34563758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42184375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4296875 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/b9ce6ed3-132a-44ed-9efc-dbfcc83d6799.json b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/b9ce6ed3-132a-44ed-9efc-dbfcc83d6799.json new file mode 100644 index 000000000..8a8858e61 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/b9ce6ed3-132a-44ed-9efc-dbfcc83d6799.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/1762652580.630025", + "retrieved_timestamp": "1762652580.630029", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7706651684197928 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6075432245295168 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21450151057401812 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34312080536912754 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43226041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4399933510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/a2b9a953-31e2-4a6f-8005-993e1133246e.json b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/a2b9a953-31e2-4a6f-8005-993e1133246e.json new file mode 100644 index 000000000..8e7fd6e65 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/a2b9a953-31e2-4a6f-8005-993e1133246e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/1762652580.630381", + "retrieved_timestamp": "1762652580.630382", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7208063493752133 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5995203934792884 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20166163141993956 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3498322147651007 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3951145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4140625 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1/6850eb56-9f2c-4d4f-a82a-29e24b81b8b3.json b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1/6850eb56-9f2c-4d4f-a82a-29e24b81b8b3.json new file mode 100644 index 000000000..99d5c29b1 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1/6850eb56-9f2c-4d4f-a82a-29e24b81b8b3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1/1762652580.628911", + "retrieved_timestamp": "1762652580.6289122", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7648949232480928 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6074511952177571 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2280966767371601 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3498322147651007 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41362499999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43209773936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2/7f429355-b60b-4298-8eb0-a072a80898d7.json b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2/7f429355-b60b-4298-8eb0-a072a80898d7.json new file mode 100644 index 000000000..178bd494c --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2/7f429355-b60b-4298-8eb0-a072a80898d7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.2/1762652580.6306539", + "retrieved_timestamp": "1762652580.6306539", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.759999024809727 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6066260664115647 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22280966767371602 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34815436241610737 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4109583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43226396276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/774a3b0c-acae-4ad2-a2a6-42c30e1db7c0.json b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/774a3b0c-acae-4ad2-a2a6-42c30e1db7c0.json new file mode 100644 index 000000000..217203e5f --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/774a3b0c-acae-4ad2-a2a6-42c30e1db7c0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/1762652580.630864", + "retrieved_timestamp": "1762652580.6308649", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7615227596111651 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6098779556010631 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20996978851963746 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3414429530201342 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43102083333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4315159574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ifable-9B-v0.1/e8502d8d-87bd-444c-b41b-7f8d4eb15b29.json b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ifable-9B-v0.1/e8502d8d-87bd-444c-b41b-7f8d4eb15b29.json new file mode 100644 index 000000000..b3e5918a4 --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-Ifable-9B-v0.1/e8502d8d-87bd-444c-b41b-7f8d4eb15b29.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ifable-9B-v0.1/1762652580.6310751", + "retrieved_timestamp": "1762652580.631076", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/recoilme-gemma-2-Ifable-9B-v0.1", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/recoilme-gemma-2-Ifable-9B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7943955371746965 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6064399292200404 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22054380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35151006711409394 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42022916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4323470744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/735bed66-1e83-4647-b730-14f0d571d597.json b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/735bed66-1e83-4647-b730-14f0d571d597.json new file mode 100644 index 000000000..79b952a8e --- /dev/null +++ b/data/hfopenllm_v2/google/zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/735bed66-1e83-4647-b730-14f0d571d597.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/1762652580.631496", + "retrieved_timestamp": "1762652580.631499", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1", + "developer": "google", + "inference_platform": "unknown", + "id": "zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.744536718130117 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.597759349920723 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18882175226586104 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34395973154362414 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42946875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41805186170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/goulue5/merging_LLM/a7fb7d77-93c3-41c8-a85a-692953dcd2c6.json b/data/hfopenllm_v2/goulue5/merging_LLM/a7fb7d77-93c3-41c8-a85a-692953dcd2c6.json new file mode 100644 index 000000000..0f0b83177 --- /dev/null +++ b/data/hfopenllm_v2/goulue5/merging_LLM/a7fb7d77-93c3-41c8-a85a-692953dcd2c6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/goulue5_merging_LLM/1762652580.1806688", + "retrieved_timestamp": "1762652580.18067", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "goulue5/merging_LLM", + "developer": "goulue5", + "inference_platform": "unknown", + "id": "goulue5/merging_LLM" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32326006108237254 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4216498611590102 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09667673716012085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43328125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29579454787234044 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gradientai/Llama-3-8B-Instruct-Gradient-1048k/79d366fc-e21c-4e5e-bb94-8d221d9df715.json b/data/hfopenllm_v2/gradientai/Llama-3-8B-Instruct-Gradient-1048k/79d366fc-e21c-4e5e-bb94-8d221d9df715.json new file mode 100644 index 000000000..5f7b03326 --- /dev/null +++ b/data/hfopenllm_v2/gradientai/Llama-3-8B-Instruct-Gradient-1048k/79d366fc-e21c-4e5e-bb94-8d221d9df715.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/gradientai_Llama-3-8B-Instruct-Gradient-1048k/1762652580.181334", + "retrieved_timestamp": "1762652580.181335", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gradientai/Llama-3-8B-Instruct-Gradient-1048k", + "developer": "gradientai", + "inference_platform": "unknown", + "id": "gradientai/Llama-3-8B-Instruct-Gradient-1048k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4455588948434598 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4345903107069573 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42975 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29404920212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/6b615d1d-7dab-4414-88a2-72fff1b5fce7.json b/data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/6b615d1d-7dab-4414-88a2-72fff1b5fce7.json new file mode 100644 index 000000000..2f35b0662 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/6b615d1d-7dab-4414-88a2-72fff1b5fce7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/1762652580.1827798", + "retrieved_timestamp": "1762652580.182781", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge", + "developer": "grimjim", + "inference_platform": "unknown", + "id": "grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42712447417297217 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4961694535006833 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09969788519637462 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40432291666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3625332446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/251c7560-4672-44a6-82df-2b8ce9a99a5e.json b/data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/251c7560-4672-44a6-82df-2b8ce9a99a5e.json new file mode 100644 index 000000000..0db18b7a1 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/251c7560-4672-44a6-82df-2b8ce9a99a5e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/1762652580.183053", + "retrieved_timestamp": "1762652580.183053", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge", + "developer": "grimjim", + "inference_platform": "unknown", + "id": "grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6805897241541332 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5021734091176594 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38851041666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3684341755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter/377105ce-c655-47fe-a565-71a4de8c3683.json b/data/hfopenllm_v2/grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter/377105ce-c655-47fe-a565-71a4de8c3683.json new file mode 100644 index 000000000..581c422f9 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter/377105ce-c655-47fe-a565-71a4de8c3683.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_Llama-3.1-8B-Instruct-abliterated_via_adapter/1762652580.183267", + "retrieved_timestamp": "1762652580.183268", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter", + "developer": "grimjim", + "inference_platform": "unknown", + "id": "grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48695018107510296 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.510526564708187 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13972809667673716 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40103125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3651097074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Llama-3.1-Bonsaikraft-8B-Instruct/5f15d683-bae4-4888-8d1c-352aac802fbe.json b/data/hfopenllm_v2/grimjim/Llama-3.1-Bonsaikraft-8B-Instruct/5f15d683-bae4-4888-8d1c-352aac802fbe.json new file mode 100644 index 000000000..e03ecc497 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Llama-3.1-Bonsaikraft-8B-Instruct/5f15d683-bae4-4888-8d1c-352aac802fbe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_Llama-3.1-Bonsaikraft-8B-Instruct/1762652580.1834722", + "retrieved_timestamp": "1762652580.1834729", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/Llama-3.1-Bonsaikraft-8B-Instruct", + "developer": "grimjim", + "inference_platform": "unknown", + "id": "grimjim/Llama-3.1-Bonsaikraft-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42500121898784116 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5286855891530357 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13141993957703926 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4235104166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3764128989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Magnolia-v2-12B/2cf86f7c-a9a8-48d0-bc10-e8a1f654092c.json b/data/hfopenllm_v2/grimjim/Magnolia-v2-12B/2cf86f7c-a9a8-48d0-bc10-e8a1f654092c.json new file mode 100644 index 000000000..dd39331e5 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Magnolia-v2-12B/2cf86f7c-a9a8-48d0-bc10-e8a1f654092c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v2-12B/1762652580.184318", + "retrieved_timestamp": "1762652580.184319", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/Magnolia-v2-12B", + "developer": "grimjim", + "inference_platform": "unknown", + "id": "grimjim/Magnolia-v2-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3506119318962575 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5290279354217235 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12915407854984895 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41712499999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3601230053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Magnolia-v3-12B/68faa5a3-82ae-462d-adad-505134024710.json b/data/hfopenllm_v2/grimjim/Magnolia-v3-12B/68faa5a3-82ae-462d-adad-505134024710.json new file mode 100644 index 000000000..77fb9812b --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Magnolia-v3-12B/68faa5a3-82ae-462d-adad-505134024710.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v3-12B/1762652580.184813", + "retrieved_timestamp": "1762652580.184814", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/Magnolia-v3-12B", + "developer": "grimjim", + "inference_platform": "unknown", + "id": "grimjim/Magnolia-v3-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39649906692021614 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5326669270363916 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1351963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32550335570469796 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4183958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3615359042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Magnolia-v4-12B/a48116ed-d4bf-4f06-94aa-2ef8364bd8d2.json b/data/hfopenllm_v2/grimjim/Magnolia-v4-12B/a48116ed-d4bf-4f06-94aa-2ef8364bd8d2.json new file mode 100644 index 000000000..002dfe6ac --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Magnolia-v4-12B/a48116ed-d4bf-4f06-94aa-2ef8364bd8d2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v4-12B/1762652580.18525", + "retrieved_timestamp": "1762652580.185251", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/Magnolia-v4-12B", + "developer": "grimjim", + "inference_platform": "unknown", + "id": "grimjim/Magnolia-v4-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34179421712168156 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5430894084668724 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13141993957703926 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42112499999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3671875 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/grimjim/Magnolia-v5a-12B/ff64dcc7-9646-4c53-8b1e-68b62a025574.json b/data/hfopenllm_v2/grimjim/Magnolia-v5a-12B/ff64dcc7-9646-4c53-8b1e-68b62a025574.json new file mode 100644 index 000000000..8b344aee1 --- /dev/null +++ b/data/hfopenllm_v2/grimjim/Magnolia-v5a-12B/ff64dcc7-9646-4c53-8b1e-68b62a025574.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v5a-12B/1762652580.185457", + "retrieved_timestamp": "1762652580.185458", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/Magnolia-v5a-12B", + "developer": "grimjim", + "inference_platform": "unknown", + "id": "grimjim/Magnolia-v5a-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41136185321613317 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5311764105029141 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13746223564954682 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221476510067114 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4144895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3601230053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/gupta-tanish/llama-7b-dpo-baseline/1b962cb9-8754-40ab-b41a-b7cdf1fa3de1.json b/data/hfopenllm_v2/gupta-tanish/llama-7b-dpo-baseline/1b962cb9-8754-40ab-b41a-b7cdf1fa3de1.json new file mode 100644 index 000000000..815117888 --- /dev/null +++ b/data/hfopenllm_v2/gupta-tanish/llama-7b-dpo-baseline/1b962cb9-8754-40ab-b41a-b7cdf1fa3de1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/gupta-tanish_llama-7b-dpo-baseline/1762652580.1871748", + "retrieved_timestamp": "1762652580.1871748", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gupta-tanish/llama-7b-dpo-baseline", + "developer": "gupta-tanish", + "inference_platform": "unknown", + "id": "gupta-tanish/llama-7b-dpo-baseline" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26930433472076315 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3896894398264714 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.445625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20279255319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/h2oai/h2o-danube-1.8b-chat/ac8f78b5-a9e1-4e17-a1e7-8a7b8dc22a8d.json b/data/hfopenllm_v2/h2oai/h2o-danube-1.8b-chat/ac8f78b5-a9e1-4e17-a1e7-8a7b8dc22a8d.json new file mode 100644 index 000000000..5a85215d5 --- /dev/null +++ b/data/hfopenllm_v2/h2oai/h2o-danube-1.8b-chat/ac8f78b5-a9e1-4e17-a1e7-8a7b8dc22a8d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube-1.8b-chat/1762652580.188648", + "retrieved_timestamp": "1762652580.188649", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "h2oai/h2o-danube-1.8b-chat", + "developer": "h2oai", + "inference_platform": "unknown", + "id": "h2oai/h2o-danube-1.8b-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2198699450790569 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3219657593234448 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3988645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13139960106382978 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 1.831 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/h2oai/h2o-danube3-4b-base/3878bb0d-753f-465a-a8c1-8408f8f5bfcf.json b/data/hfopenllm_v2/h2oai/h2o-danube3-4b-base/3878bb0d-753f-465a-a8c1-8408f8f5bfcf.json new file mode 100644 index 000000000..7ceee720f --- /dev/null +++ b/data/hfopenllm_v2/h2oai/h2o-danube3-4b-base/3878bb0d-753f-465a-a8c1-8408f8f5bfcf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube3-4b-base/1762652580.18891", + "retrieved_timestamp": "1762652580.1889112", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "h2oai/h2o-danube3-4b-base", + "developer": "h2oai", + "inference_platform": "unknown", + "id": "h2oai/h2o-danube3-4b-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23380851695722904 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3599083951265592 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.022658610271903322 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37781250000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2109375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.962 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/h2oai/h2o-danube3-4b-chat/d3df3cb7-5e79-49e5-9ed1-1e2771318915.json b/data/hfopenllm_v2/h2oai/h2o-danube3-4b-chat/d3df3cb7-5e79-49e5-9ed1-1e2771318915.json new file mode 100644 index 000000000..9f9d53492 --- /dev/null +++ b/data/hfopenllm_v2/h2oai/h2o-danube3-4b-chat/d3df3cb7-5e79-49e5-9ed1-1e2771318915.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube3-4b-chat/1762652580.1891232", + "retrieved_timestamp": "1762652580.189124", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "h2oai/h2o-danube3-4b-chat", + "developer": "h2oai", + "inference_platform": "unknown", + "id": "h2oai/h2o-danube3-4b-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3628771659197596 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3466170643135169 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.378125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22282247340425532 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.962 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/h2oai/h2o-danube3-500m-chat/c917765b-a4b4-4e5d-9c11-eed791349daf.json b/data/hfopenllm_v2/h2oai/h2o-danube3-500m-chat/c917765b-a4b4-4e5d-9c11-eed791349daf.json new file mode 100644 index 000000000..71977428b --- /dev/null +++ b/data/hfopenllm_v2/h2oai/h2o-danube3-500m-chat/c917765b-a4b4-4e5d-9c11-eed791349daf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube3-500m-chat/1762652580.1893299", + "retrieved_timestamp": "1762652580.1893299", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "h2oai/h2o-danube3-500m-chat", + "developer": "h2oai", + "inference_platform": "unknown", + "id": "h2oai/h2o-danube3-500m-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2207941594968018 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3034691168308313 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23070469798657717 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34339583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11436170212765957 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.514 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/h2oai/h2o-danube3.1-4b-chat/5f5d83bd-91e9-416b-b40d-506f3861ed3f.json b/data/hfopenllm_v2/h2oai/h2o-danube3.1-4b-chat/5f5d83bd-91e9-416b-b40d-506f3861ed3f.json new file mode 100644 index 000000000..ac268c5f7 --- /dev/null +++ b/data/hfopenllm_v2/h2oai/h2o-danube3.1-4b-chat/5f5d83bd-91e9-416b-b40d-506f3861ed3f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube3.1-4b-chat/1762652580.189556", + "retrieved_timestamp": "1762652580.189557", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "h2oai/h2o-danube3.1-4b-chat", + "developer": "h2oai", + "inference_platform": "unknown", + "id": "h2oai/h2o-danube3.1-4b-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5021121734774842 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3608421638178268 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03323262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41015625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2718583776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.962 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/haoranxu/ALMA-13B-R/9446f216-e3d6-4fca-ae00-937b4a76e5bf.json b/data/hfopenllm_v2/haoranxu/ALMA-13B-R/9446f216-e3d6-4fca-ae00-937b4a76e5bf.json new file mode 100644 index 000000000..14c091ead --- /dev/null +++ b/data/hfopenllm_v2/haoranxu/ALMA-13B-R/9446f216-e3d6-4fca-ae00-937b4a76e5bf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/haoranxu_ALMA-13B-R/1762652580.189782", + "retrieved_timestamp": "1762652580.189783", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "haoranxu/ALMA-13B-R", + "developer": "haoranxu", + "inference_platform": "unknown", + "id": "haoranxu/ALMA-13B-R" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.003921816336210145 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.345656261205981 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.017371601208459216 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35279166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18168218085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 13.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-CPO-SimPO/aa67ad0b-e469-4b49-a797-4542370a2e94.json b/data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-CPO-SimPO/aa67ad0b-e469-4b49-a797-4542370a2e94.json new file mode 100644 index 000000000..c837d275d --- /dev/null +++ b/data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-CPO-SimPO/aa67ad0b-e469-4b49-a797-4542370a2e94.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/haoranxu_Llama-3-Instruct-8B-CPO-SimPO/1762652580.190052", + "retrieved_timestamp": "1762652580.190052", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "haoranxu/Llama-3-Instruct-8B-CPO-SimPO", + "developer": "haoranxu", + "inference_platform": "unknown", + "id": "haoranxu/Llama-3-Instruct-8B-CPO-SimPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7046447869430887 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5048301774821616 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1027190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3566666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3686003989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-SimPO/39aa4e41-376f-4ee6-8925-8bf746a871a0.json b/data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-SimPO/39aa4e41-376f-4ee6-8925-8bf746a871a0.json new file mode 100644 index 000000000..d2caac9c6 --- /dev/null +++ b/data/hfopenllm_v2/haoranxu/Llama-3-Instruct-8B-SimPO/39aa4e41-376f-4ee6-8925-8bf746a871a0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/haoranxu_Llama-3-Instruct-8B-SimPO/1762652580.190277", + "retrieved_timestamp": "1762652580.1902778", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "haoranxu/Llama-3-Instruct-8B-SimPO", + "developer": "haoranxu", + "inference_platform": "unknown", + "id": "haoranxu/Llama-3-Instruct-8B-SimPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7347449212533854 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49792360151415016 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08761329305135952 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35660416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37333776595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc/7d3c185f-4b4f-4bdd-bac9-f4ba2410f40c.json b/data/hfopenllm_v2/hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc/7d3c185f-4b4f-4bdd-bac9-f4ba2410f40c.json new file mode 100644 index 000000000..8d3d3a6e3 --- /dev/null +++ b/data/hfopenllm_v2/hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc/7d3c185f-4b4f-4bdd-bac9-f4ba2410f40c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hatemmahmoud_qwen2.5-1.5b-sft-raft-grpo-hra-doc/1762652580.190489", + "retrieved_timestamp": "1762652580.190489", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc", + "developer": "hatemmahmoud", + "inference_platform": "unknown", + "id": "hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41958004760701606 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4269926809768501 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2175226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36097916666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.277593085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v0.5/6e87be06-ca0e-48a4-ae28-4a5794600117.json b/data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v0.5/6e87be06-ca0e-48a4-ae28-4a5794600117.json new file mode 100644 index 000000000..1f28be60e --- /dev/null +++ b/data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v0.5/6e87be06-ca0e-48a4-ae28-4a5794600117.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hon9kon9ize_CantoneseLLMChat-v0.5/1762652580.190754", + "retrieved_timestamp": "1762652580.1907551", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hon9kon9ize/CantoneseLLMChat-v0.5", + "developer": "hon9kon9ize", + "inference_platform": "unknown", + "id": "hon9kon9ize/CantoneseLLMChat-v0.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3230849701015528 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43452388803059244 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04154078549848943 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4706458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2504155585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.069 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v1.0-7B/cccf983e-e1b8-4f0f-b147-abccdea65548.json b/data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v1.0-7B/cccf983e-e1b8-4f0f-b147-abccdea65548.json new file mode 100644 index 000000000..f24222406 --- /dev/null +++ b/data/hfopenllm_v2/hon9kon9ize/CantoneseLLMChat-v1.0-7B/cccf983e-e1b8-4f0f-b147-abccdea65548.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hon9kon9ize_CantoneseLLMChat-v1.0-7B/1762652580.191013", + "retrieved_timestamp": "1762652580.191013", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hon9kon9ize/CantoneseLLMChat-v1.0-7B", + "developer": "hon9kon9ize", + "inference_platform": "unknown", + "id": "hon9kon9ize/CantoneseLLMChat-v1.0-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44548353923146145 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4865734655539633 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2107250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221476510067114 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3882916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3784906914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hongbai12/li-0.4-pre/ab7dcb4c-3884-428f-b342-38034dd51b56.json b/data/hfopenllm_v2/hongbai12/li-0.4-pre/ab7dcb4c-3884-428f-b342-38034dd51b56.json new file mode 100644 index 000000000..041421a78 --- /dev/null +++ b/data/hfopenllm_v2/hongbai12/li-0.4-pre/ab7dcb4c-3884-428f-b342-38034dd51b56.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hongbai12_li-0.4-pre/1762652580.191224", + "retrieved_timestamp": "1762652580.191225", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hongbai12/li-0.4-pre", + "developer": "hongbai12", + "inference_platform": "unknown", + "id": "hongbai12/li-0.4-pre" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5199725616918665 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6298274927108823 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49244712990936557 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4513020833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5014960106382979 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/Falcon3Slerp1-10B/376d342c-669b-4c76-9e7b-d49566ac441d.json b/data/hfopenllm_v2/hotmailuser/Falcon3Slerp1-10B/376d342c-669b-4c76-9e7b-d49566ac441d.json new file mode 100644 index 000000000..4fb45f91c --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/Falcon3Slerp1-10B/376d342c-669b-4c76-9e7b-d49566ac441d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_Falcon3Slerp1-10B/1762652580.19171", + "retrieved_timestamp": "1762652580.191711", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/Falcon3Slerp1-10B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "id": "hotmailuser/Falcon3Slerp1-10B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5694069513335727 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.616984966186231 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2598187311178248 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34395973154362414 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43176041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4401595744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/Falcon3Slerp2-10B/bae0b772-8ae6-4fed-ae78-d6d83e560a95.json b/data/hfopenllm_v2/hotmailuser/Falcon3Slerp2-10B/bae0b772-8ae6-4fed-ae78-d6d83e560a95.json new file mode 100644 index 000000000..0e843e80c --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/Falcon3Slerp2-10B/bae0b772-8ae6-4fed-ae78-d6d83e560a95.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_Falcon3Slerp2-10B/1762652580.191951", + "retrieved_timestamp": "1762652580.191952", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/Falcon3Slerp2-10B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "id": "hotmailuser/Falcon3Slerp2-10B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6117966994241945 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6164263500746402 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23187311178247735 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4095625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4369182180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/Falcon3Slerp4-10B/d5466af4-2bef-4ce8-a659-9e05a5e674b6.json b/data/hfopenllm_v2/hotmailuser/Falcon3Slerp4-10B/d5466af4-2bef-4ce8-a659-9e05a5e674b6.json new file mode 100644 index 000000000..fd6f6c77c --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/Falcon3Slerp4-10B/d5466af4-2bef-4ce8-a659-9e05a5e674b6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_Falcon3Slerp4-10B/1762652580.19215", + "retrieved_timestamp": "1762652580.192151", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/Falcon3Slerp4-10B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "id": "hotmailuser/Falcon3Slerp4-10B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6072254950198805 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.611433776236228 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22885196374622357 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288590604026846 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40175 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4387466755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp-3B/2db7aa3c-4969-40c0-b8c6-1ff5c953ba23.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp-3B/2db7aa3c-4969-40c0-b8c6-1ff5c953ba23.json new file mode 100644 index 000000000..cdc181d82 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/FalconSlerp-3B/2db7aa3c-4969-40c0-b8c6-1ff5c953ba23.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp-3B/1762652580.19236", + "retrieved_timestamp": "1762652580.1923609", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/FalconSlerp-3B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "id": "hotmailuser/FalconSlerp-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5694568190179834 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46239111387485293 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17598187311178248 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3989270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29679188829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.228 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp1-7B/5d01fa6d-4280-4926-b166-e98892ee60f4.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp1-7B/5d01fa6d-4280-4926-b166-e98892ee60f4.json new file mode 100644 index 000000000..29d7c9460 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/FalconSlerp1-7B/5d01fa6d-4280-4926-b166-e98892ee60f4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp1-7B/1762652580.1925812", + "retrieved_timestamp": "1762652580.192582", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/FalconSlerp1-7B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "id": "hotmailuser/FalconSlerp1-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5394564200765082 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5354677787663963 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23791540785498488 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44525 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4128989361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp2-7B/fc8605ad-f7b9-4a73-afd3-85b996fc2549.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp2-7B/fc8605ad-f7b9-4a73-afd3-85b996fc2549.json new file mode 100644 index 000000000..41eff7414 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/FalconSlerp2-7B/fc8605ad-f7b9-4a73-afd3-85b996fc2549.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp2-7B/1762652580.1928341", + "retrieved_timestamp": "1762652580.192835", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/FalconSlerp2-7B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "id": "hotmailuser/FalconSlerp2-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6160432097944565 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5537805428914538 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2983383685800604 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44788541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4140625 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp3-10B/f933fbc2-370e-4231-94a9-c833c2aa793d.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp3-10B/f933fbc2-370e-4231-94a9-c833c2aa793d.json new file mode 100644 index 000000000..beaf60bd2 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/FalconSlerp3-10B/f933fbc2-370e-4231-94a9-c833c2aa793d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp3-10B/1762652580.1930392", + "retrieved_timestamp": "1762652580.19304", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/FalconSlerp3-10B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "id": "hotmailuser/FalconSlerp3-10B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6001564737119731 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6060288025434474 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22734138972809667 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33557046979865773 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4030833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4323470744680851 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp3-7B/017a681e-1bbb-4890-bfcc-f276954678e1.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp3-7B/017a681e-1bbb-4890-bfcc-f276954678e1.json new file mode 100644 index 000000000..7f506426d --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/FalconSlerp3-7B/017a681e-1bbb-4890-bfcc-f276954678e1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp3-7B/1762652580.193249", + "retrieved_timestamp": "1762652580.19325", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/FalconSlerp3-7B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "id": "hotmailuser/FalconSlerp3-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6096235765546527 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5532966528909408 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3157099697885196 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45067708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41273271276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp4-7B/d6ac7c9f-212e-4000-b89e-d977122d2e2b.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp4-7B/d6ac7c9f-212e-4000-b89e-d977122d2e2b.json new file mode 100644 index 000000000..aeab40c12 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/FalconSlerp4-7B/d6ac7c9f-212e-4000-b89e-d977122d2e2b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp4-7B/1762652580.193457", + "retrieved_timestamp": "1762652580.1934578", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/FalconSlerp4-7B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "id": "hotmailuser/FalconSlerp4-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6284580468711907 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5523506352993854 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2212990936555891 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33221476510067116 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4585208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4031748670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/FalconSlerp6-7B/88a4587f-d3d4-4b08-b800-13a2daf4a660.json b/data/hfopenllm_v2/hotmailuser/FalconSlerp6-7B/88a4587f-d3d4-4b08-b800-13a2daf4a660.json new file mode 100644 index 000000000..ae93e5f22 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/FalconSlerp6-7B/88a4587f-d3d4-4b08-b800-13a2daf4a660.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp6-7B/1762652580.193665", + "retrieved_timestamp": "1762652580.193666", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/FalconSlerp6-7B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "id": "hotmailuser/FalconSlerp6-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6026542906155667 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5383801786207648 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20468277945619334 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44921875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39951795212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/hotmailuser/RombosBeagle-v2beta-MGS-32B/c507c0ac-759a-4013-8dd0-7ab5a959ca65.json b/data/hfopenllm_v2/hotmailuser/RombosBeagle-v2beta-MGS-32B/c507c0ac-759a-4013-8dd0-7ab5a959ca65.json new file mode 100644 index 000000000..8c9234506 --- /dev/null +++ b/data/hfopenllm_v2/hotmailuser/RombosBeagle-v2beta-MGS-32B/c507c0ac-759a-4013-8dd0-7ab5a959ca65.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_RombosBeagle-v2beta-MGS-32B/1762652580.199307", + "retrieved_timestamp": "1762652580.199308", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/RombosBeagle-v2beta-MGS-32B", + "developer": "hotmailuser", + "inference_platform": "unknown", + "id": "hotmailuser/RombosBeagle-v2beta-MGS-32B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5156761836371937 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7037350002757341 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49924471299093653 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3800335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5020833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5907579787234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-7030/5fb3b31d-8c2c-4d76-8532-1bff0f793f4b.json b/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-7030/5fb3b31d-8c2c-4d76-8532-1bff0f793f4b.json new file mode 100644 index 000000000..247302ea6 --- /dev/null +++ b/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-7030/5fb3b31d-8c2c-4d76-8532-1bff0f793f4b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/huihui-ai_QwQ-32B-Coder-Fusion-7030/1762652580.2006452", + "retrieved_timestamp": "1762652580.200646", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "huihui-ai/QwQ-32B-Coder-Fusion-7030", + "developer": "huihui-ai", + "inference_platform": "unknown", + "id": "huihui-ai/QwQ-32B-Coder-Fusion-7030" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38650779930584184 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6177864730931621 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2794561933534743 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39222916666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4367519946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-8020/461ee093-b573-4ce9-9168-c9852dc9745b.json b/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-8020/461ee093-b573-4ce9-9168-c9852dc9745b.json new file mode 100644 index 000000000..7849a5bed --- /dev/null +++ b/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-8020/461ee093-b573-4ce9-9168-c9852dc9745b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/huihui-ai_QwQ-32B-Coder-Fusion-8020/1762652580.200916", + "retrieved_timestamp": "1762652580.200917", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "huihui-ai/QwQ-32B-Coder-Fusion-8020", + "developer": "huihui-ai", + "inference_platform": "unknown", + "id": "huihui-ai/QwQ-32B-Coder-Fusion-8020" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6020547702318737 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6664531829718748 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3548657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42934374999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5367353723404256 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-9010/41d5fb44-855b-4ff1-8f5d-95b8a9f9a9af.json b/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-9010/41d5fb44-855b-4ff1-8f5d-95b8a9f9a9af.json new file mode 100644 index 000000000..4e2cd9c36 --- /dev/null +++ b/data/hfopenllm_v2/huihui-ai/QwQ-32B-Coder-Fusion-9010/41d5fb44-855b-4ff1-8f5d-95b8a9f9a9af.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/huihui-ai_QwQ-32B-Coder-Fusion-9010/1762652580.201131", + "retrieved_timestamp": "1762652580.201132", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "huihui-ai/QwQ-32B-Coder-Fusion-9010", + "developer": "huihui-ai", + "inference_platform": "unknown", + "id": "huihui-ai/QwQ-32B-Coder-Fusion-9010" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5778246164620984 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6727405551499568 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5317220543806647 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3615771812080537 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4681979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5600066489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2/92cad41b-64b5-48db-b865-77d0ea2ef834.json b/data/hfopenllm_v2/huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2/92cad41b-64b5-48db-b865-77d0ea2ef834.json new file mode 100644 index 000000000..a4199f2de --- /dev/null +++ b/data/hfopenllm_v2/huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2/92cad41b-64b5-48db-b865-77d0ea2ef834.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/huihui-ai_Qwen2.5-14B-Instruct-abliterated-v2/1762652580.201351", + "retrieved_timestamp": "1762652580.201352", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2", + "developer": "huihui-ai", + "inference_platform": "unknown", + "id": "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8327637335602867 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6323822447052897 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5302114803625377 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3338926174496644 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42196875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49617686170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/huihui-ai/Qwen2.5-72B-Instruct-abliterated/b892c2f3-4aa6-4b19-80e5-1b0f5e0eda25.json b/data/hfopenllm_v2/huihui-ai/Qwen2.5-72B-Instruct-abliterated/b892c2f3-4aa6-4b19-80e5-1b0f5e0eda25.json new file mode 100644 index 000000000..c092ca338 --- /dev/null +++ b/data/hfopenllm_v2/huihui-ai/Qwen2.5-72B-Instruct-abliterated/b892c2f3-4aa6-4b19-80e5-1b0f5e0eda25.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/huihui-ai_Qwen2.5-72B-Instruct-abliterated/1762652580.2015731", + "retrieved_timestamp": "1762652580.2015731", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "huihui-ai/Qwen2.5-72B-Instruct-abliterated", + "developer": "huihui-ai", + "inference_platform": "unknown", + "id": "huihui-ai/Qwen2.5-72B-Instruct-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8592667455684251 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7189881596250237 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6012084592145015 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3951342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4232708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5536901595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2/15c4b42b-ee8f-4f0d-8d54-7d827133fe7f.json b/data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2/15c4b42b-ee8f-4f0d-8d54-7d827133fe7f.json new file mode 100644 index 000000000..1a268fe24 --- /dev/null +++ b/data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2/15c4b42b-ee8f-4f0d-8d54-7d827133fe7f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/huihui-ai_Qwen2.5-7B-Instruct-abliterated-v2/1762652580.201998", + "retrieved_timestamp": "1762652580.201998", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2", + "developer": "huihui-ai", + "inference_platform": "unknown", + "id": "huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7606484128778308 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5376688442794247 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4637462235649547 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3980625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42079454787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated/625501d4-5d1e-48e0-8690-e301c51f652d.json b/data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated/625501d4-5d1e-48e0-8690-e301c51f652d.json new file mode 100644 index 000000000..4d27c50fe --- /dev/null +++ b/data/hfopenllm_v2/huihui-ai/Qwen2.5-7B-Instruct-abliterated/625501d4-5d1e-48e0-8690-e301c51f652d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/huihui-ai_Qwen2.5-7B-Instruct-abliterated/1762652580.201783", + "retrieved_timestamp": "1762652580.2017841", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "huihui-ai/Qwen2.5-7B-Instruct-abliterated", + "developer": "huihui-ai", + "inference_platform": "unknown", + "id": "huihui-ai/Qwen2.5-7B-Instruct-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7546033413564897 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5261589972829911 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45770392749244715 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39666666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41796875 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3/50854a36-b87e-421d-b8d5-7a46054ecc59.json b/data/hfopenllm_v2/huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3/50854a36-b87e-421d-b8d5-7a46054ecc59.json new file mode 100644 index 000000000..b87f1aa3e --- /dev/null +++ b/data/hfopenllm_v2/huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3/50854a36-b87e-421d-b8d5-7a46054ecc59.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/huu-ontocord_wide_3b_orpo_stage1.1-ss1-orpo3/1762652580.202209", + "retrieved_timestamp": "1762652580.20221", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3", + "developer": "huu-ontocord", + "inference_platform": "unknown", + "id": "huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15052726764983576 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936618285636837 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.009818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36178125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11643949468085106 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/iRyanBell/ARC1-II/19afc23f-5849-4147-b240-9bb7ddea4d58.json b/data/hfopenllm_v2/iRyanBell/ARC1-II/19afc23f-5849-4147-b240-9bb7ddea4d58.json new file mode 100644 index 000000000..a19cbb260 --- /dev/null +++ b/data/hfopenllm_v2/iRyanBell/ARC1-II/19afc23f-5849-4147-b240-9bb7ddea4d58.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/iRyanBell_ARC1-II/1762652580.204559", + "retrieved_timestamp": "1762652580.204561", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "iRyanBell/ARC1-II", + "developer": "iRyanBell", + "inference_platform": "unknown", + "id": "iRyanBell/ARC1-II" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17083560508340093 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33817781029884353 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4912916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1685505319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/iRyanBell/ARC1/62f9b47d-2860-44b3-8abb-3d441f4bdeb4.json b/data/hfopenllm_v2/iRyanBell/ARC1/62f9b47d-2860-44b3-8abb-3d441f4bdeb4.json new file mode 100644 index 000000000..891c3fc0a --- /dev/null +++ b/data/hfopenllm_v2/iRyanBell/ARC1/62f9b47d-2860-44b3-8abb-3d441f4bdeb4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/iRyanBell_ARC1/1762652580.204204", + "retrieved_timestamp": "1762652580.204204", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "iRyanBell/ARC1", + "developer": "iRyanBell", + "inference_platform": "unknown", + "id": "iRyanBell/ARC1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.441112913735555 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4902999658144703 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06873111782477341 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3990520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3371010638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibivibiv/colossus_120b/f0bcf710-b1a8-4736-9fd3-6b0ea241155e.json b/data/hfopenllm_v2/ibivibiv/colossus_120b/f0bcf710-b1a8-4736-9fd3-6b0ea241155e.json new file mode 100644 index 000000000..3e8c4d80b --- /dev/null +++ b/data/hfopenllm_v2/ibivibiv/colossus_120b/f0bcf710-b1a8-4736-9fd3-6b0ea241155e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibivibiv_colossus_120b/1762652580.2048829", + "retrieved_timestamp": "1762652580.204884", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibivibiv/colossus_120b", + "developer": "ibivibiv", + "inference_platform": "unknown", + "id": "ibivibiv/colossus_120b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42759877126025614 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6061408586494191 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4733125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3961103723404255 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 117.749 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibivibiv/multimaster-7b-v6/7044a4d4-1c07-40ef-917c-d242b61d7877.json b/data/hfopenllm_v2/ibivibiv/multimaster-7b-v6/7044a4d4-1c07-40ef-917c-d242b61d7877.json new file mode 100644 index 000000000..3db460923 --- /dev/null +++ b/data/hfopenllm_v2/ibivibiv/multimaster-7b-v6/7044a4d4-1c07-40ef-917c-d242b61d7877.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibivibiv_multimaster-7b-v6/1762652580.205187", + "retrieved_timestamp": "1762652580.205188", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibivibiv/multimaster-7b-v6", + "developer": "ibivibiv", + "inference_platform": "unknown", + "id": "ibivibiv/multimaster-7b-v6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4473075883101283 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.519351871026721 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.055891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43957291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30950797872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 35.428 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-base/52e253ba-0291-4e78-b292-806cabe74697.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-base/52e253ba-0291-4e78-b292-806cabe74697.json new file mode 100644 index 000000000..89c7caafd --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-base/52e253ba-0291-4e78-b292-806cabe74697.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-1b-a400m-base/1762652580.205958", + "retrieved_timestamp": "1762652580.20596", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm-granite/granite-3.0-1b-a400m-base", + "developer": "ibm-granite", + "inference_platform": "unknown", + "id": "ibm-granite/granite-3.0-1b-a400m-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24040324117785256 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221205531032148 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24748322147651006 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3367291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11519281914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 1.335 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-instruct/afc49838-c7fc-40ed-841f-74b0bc3dd36e.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-instruct/afc49838-c7fc-40ed-841f-74b0bc3dd36e.json new file mode 100644 index 000000000..c956d5079 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.0-1b-a400m-instruct/afc49838-c7fc-40ed-841f-74b0bc3dd36e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-1b-a400m-instruct/1762652580.206321", + "retrieved_timestamp": "1762652580.206322", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm-granite/granite-3.0-1b-a400m-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "id": "ibm-granite/granite-3.0-1b-a400m-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33315159332792543 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3223950988485842 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36228124999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12441821808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 1.335 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-2b-base/184f8ef6-7cb7-45f2-b983-70dc4503a968.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-2b-base/184f8ef6-7cb7-45f2-b983-70dc4503a968.json new file mode 100644 index 000000000..e299c3e5a --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.0-2b-base/184f8ef6-7cb7-45f2-b983-70dc4503a968.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-2b-base/1762652580.206552", + "retrieved_timestamp": "1762652580.206552", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm-granite/granite-3.0-2b-base", + "developer": "ibm-granite", + "inference_platform": "unknown", + "id": "ibm-granite/granite-3.0-2b-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3873821460391761 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40474805593806223 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28020134228187926 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3434270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23811502659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 2.634 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-2b-instruct/ec853cc1-7c48-4334-9ff6-d9669750570b.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-2b-instruct/ec853cc1-7c48-4334-9ff6-d9669750570b.json new file mode 100644 index 000000000..f557c6b8a --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.0-2b-instruct/ec853cc1-7c48-4334-9ff6-d9669750570b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-2b-instruct/1762652580.206777", + "retrieved_timestamp": "1762652580.206777", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm-granite/granite-3.0-2b-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "id": "ibm-granite/granite-3.0-2b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.513977357854936 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44119772062630297 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09214501510574018 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35148958333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2814162234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 2.634 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-base/f917bdff-4be5-440b-8e62-bb9f7b0dd0f5.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-base/f917bdff-4be5-440b-8e62-bb9f7b0dd0f5.json new file mode 100644 index 000000000..7b7e69638 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-base/f917bdff-4be5-440b-8e62-bb9f7b0dd0f5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-3b-a800m-base/1762652580.20698", + "retrieved_timestamp": "1762652580.20698", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm-granite/granite-3.0-3b-a800m-base", + "developer": "ibm-granite", + "inference_platform": "unknown", + "id": "ibm-granite/granite-3.0-3b-a800m-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2732261510569733 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36674974971308566 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04833836858006042 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34196875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18907912234042554 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 3.374 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-instruct/7c92caf5-df83-4c8e-ab85-f99c7ac43f63.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-instruct/7c92caf5-df83-4c8e-ab85-f99c7ac43f63.json new file mode 100644 index 000000000..baf223701 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.0-3b-a800m-instruct/7c92caf5-df83-4c8e-ab85-f99c7ac43f63.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-3b-a800m-instruct/1762652580.2071838", + "retrieved_timestamp": "1762652580.2071848", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm-granite/granite-3.0-3b-a800m-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "id": "ibm-granite/granite-3.0-3b-a800m-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4298217618142085 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37527805291733446 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0702416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3486666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21517619680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 3.374 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-8b-base/b7b71327-323b-4b7c-92a1-426911bed479.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-8b-base/b7b71327-323b-4b7c-92a1-426911bed479.json new file mode 100644 index 000000000..a7bfa635d --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.0-8b-base/b7b71327-323b-4b7c-92a1-426911bed479.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-8b-base/1762652580.207386", + "retrieved_timestamp": "1762652580.207386", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm-granite/granite-3.0-8b-base", + "developer": "ibm-granite", + "inference_platform": "unknown", + "id": "ibm-granite/granite-3.0-8b-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4583482936386566 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4943760637365333 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10120845921450151 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32550335570469796 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40813541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3312832446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 8.171 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.0-8b-instruct/d4dc4d78-33a3-428c-9490-382dd0c19c08.json b/data/hfopenllm_v2/ibm-granite/granite-3.0-8b-instruct/d4dc4d78-33a3-428c-9490-382dd0c19c08.json new file mode 100644 index 000000000..bb0842ef7 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.0-8b-instruct/d4dc4d78-33a3-428c-9490-382dd0c19c08.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-8b-instruct/1762652580.207594", + "retrieved_timestamp": "1762652580.207595", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm-granite/granite-3.0-8b-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "id": "ibm-granite/granite-3.0-8b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5309633993359841 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5191874631840226 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1419939577039275 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33221476510067116 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3900625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34566156914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 8.171 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-base/17192714-a653-428d-a7c7-06dd41db77fa.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-base/17192714-a653-428d-a7c7-06dd41db77fa.json new file mode 100644 index 000000000..cebdea0f8 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-base/17192714-a653-428d-a7c7-06dd41db77fa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-1b-a400m-base/1762652580.207968", + "retrieved_timestamp": "1762652580.2079701", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm-granite/granite-3.1-1b-a400m-base", + "developer": "ibm-granite", + "inference_platform": "unknown", + "id": "ibm-granite/granite-3.1-1b-a400m-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2519437315212525 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3298699546506724 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027190332326283987 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3500625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11394614361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteMoeForCausalLM", + "params_billions": 1.335 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-instruct/8167695b-db96-4687-91b8-0af55e67a606.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-instruct/8167695b-db96-4687-91b8-0af55e67a606.json new file mode 100644 index 000000000..77a79c6d6 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.1-1b-a400m-instruct/8167695b-db96-4687-91b8-0af55e67a606.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-1b-a400m-instruct/1762652580.208256", + "retrieved_timestamp": "1762652580.208257", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm-granite/granite-3.1-1b-a400m-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "id": "ibm-granite/granite-3.1-1b-a400m-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46863987553025976 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3279834385375178 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23993288590604026 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33025 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12167553191489362 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GraniteMoeForCausalLM", + "params_billions": 1.335 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-2b-base/971e6eba-61ff-42e6-9740-1895080ff94f.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-2b-base/971e6eba-61ff-42e6-9740-1895080ff94f.json new file mode 100644 index 000000000..4fc5f0a9a --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.1-2b-base/971e6eba-61ff-42e6-9740-1895080ff94f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-2b-base/1762652580.208491", + "retrieved_timestamp": "1762652580.208492", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm-granite/granite-3.1-2b-base", + "developer": "ibm-granite", + "inference_platform": "unknown", + "id": "ibm-granite/granite-3.1-2b-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35216115462528313 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4047188028918873 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3485729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22506648936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 2.534 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-2b-instruct/fcdf14a1-900f-4856-aac6-8ed47910f882.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-2b-instruct/fcdf14a1-900f-4856-aac6-8ed47910f882.json new file mode 100644 index 000000000..9f4cd39fb --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.1-2b-instruct/fcdf14a1-900f-4856-aac6-8ed47910f882.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-2b-instruct/1762652580.2087219", + "retrieved_timestamp": "1762652580.2087228", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm-granite/granite-3.1-2b-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "id": "ibm-granite/granite-3.1-2b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.628557782240012 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44089858558056544 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15256797583081572 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3605416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28191489361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GraniteForCausalLM", + "params_billions": 2.534 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-base/8930e3f9-e0b8-4fb7-91e2-ee34b17cf1eb.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-base/8930e3f9-e0b8-4fb7-91e2-ee34b17cf1eb.json new file mode 100644 index 000000000..692dc704f --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-base/8930e3f9-e0b8-4fb7-91e2-ee34b17cf1eb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-3b-a800m-base/1762652580.20895", + "retrieved_timestamp": "1762652580.208951", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm-granite/granite-3.1-3b-a800m-base", + "developer": "ibm-granite", + "inference_platform": "unknown", + "id": "ibm-granite/granite-3.1-3b-a800m-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2996294276962903 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.362822992347764 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3275208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1792719414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteMoeForCausalLM", + "params_billions": 3.299 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-instruct/1e0c27fc-8111-4325-8e61-c24c2f8124f7.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-instruct/1e0c27fc-8111-4325-8e61-c24c2f8124f7.json new file mode 100644 index 000000000..a8036aa01 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.1-3b-a800m-instruct/1e0c27fc-8111-4325-8e61-c24c2f8124f7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-3b-a800m-instruct/1762652580.2092001", + "retrieved_timestamp": "1762652580.2092009", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm-granite/granite-3.1-3b-a800m-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "id": "ibm-granite/granite-3.1-3b-a800m-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5516462984880118 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4009494521947192 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11404833836858005 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3486354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21476063829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GraniteMoeForCausalLM", + "params_billions": 3.299 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-8b-base/10cbee10-0344-4da0-a26a-4298fd8f4d11.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-8b-base/10cbee10-0344-4da0-a26a-4298fd8f4d11.json new file mode 100644 index 000000000..9b5e19d43 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.1-8b-base/10cbee10-0344-4da0-a26a-4298fd8f4d11.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-8b-base/1762652580.209538", + "retrieved_timestamp": "1762652580.2095392", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm-granite/granite-3.1-8b-base", + "developer": "ibm-granite", + "inference_platform": "unknown", + "id": "ibm-granite/granite-3.1-8b-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4221033524381973 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4776956677111636 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09441087613293052 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3922291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3232214095744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GraniteForCausalLM", + "params_billions": 8.171 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.1-8b-instruct/6d6b2e81-8b90-4703-aafb-40de92b3ede3.json b/data/hfopenllm_v2/ibm-granite/granite-3.1-8b-instruct/6d6b2e81-8b90-4703-aafb-40de92b3ede3.json new file mode 100644 index 000000000..8134ed056 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.1-8b-instruct/6d6b2e81-8b90-4703-aafb-40de92b3ede3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-8b-instruct/1762652580.2098079", + "retrieved_timestamp": "1762652580.2098088", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm-granite/granite-3.1-8b-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "id": "ibm-granite/granite-3.1-8b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7207564816908026 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5364460433816018 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21978851963746224 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47070833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3537234042553192 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GraniteForCausalLM", + "params_billions": 8.171 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.2-2b-instruct/39fd9dc4-88e4-4b52-8527-c1ea692d8ca1.json b/data/hfopenllm_v2/ibm-granite/granite-3.2-2b-instruct/39fd9dc4-88e4-4b52-8527-c1ea692d8ca1.json new file mode 100644 index 000000000..5bd18834a --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.2-2b-instruct/39fd9dc4-88e4-4b52-8527-c1ea692d8ca1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.2-2b-instruct/1762652580.2100549", + "retrieved_timestamp": "1762652580.2100558", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm-granite/granite-3.2-2b-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "id": "ibm-granite/granite-3.2-2b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6151688630611223 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43872707491212865 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14425981873111782 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3645729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2783410904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 2.534 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-3.2-8b-instruct/982accb5-ea5c-45bc-8cdd-08edf5e543a1.json b/data/hfopenllm_v2/ibm-granite/granite-3.2-8b-instruct/982accb5-ea5c-45bc-8cdd-08edf5e543a1.json new file mode 100644 index 000000000..20fcb7de4 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-3.2-8b-instruct/982accb5-ea5c-45bc-8cdd-08edf5e543a1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.2-8b-instruct/1762652580.210291", + "retrieved_timestamp": "1762652580.2102919", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm-granite/granite-3.2-8b-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "id": "ibm-granite/granite-3.2-8b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7274509412802475 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5401759656246116 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23791540785498488 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4561979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35123005319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 8.171 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-7b-base/2d21a773-8f72-4b7d-ba94-80867127c54a.json b/data/hfopenllm_v2/ibm-granite/granite-7b-base/2d21a773-8f72-4b7d-ba94-80867127c54a.json new file mode 100644 index 000000000..5887bd6b9 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-7b-base/2d21a773-8f72-4b7d-ba94-80867127c54a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-7b-base/1762652580.2106082", + "retrieved_timestamp": "1762652580.210609", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm-granite/granite-7b-base", + "developer": "ibm-granite", + "inference_platform": "unknown", + "id": "ibm-granite/granite-7b-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24142719096441884 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34804372716106186 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24580536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35548958333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18342752659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm-granite/granite-7b-instruct/509f5b3a-6110-4757-a313-80181ecd3228.json b/data/hfopenllm_v2/ibm-granite/granite-7b-instruct/509f5b3a-6110-4757-a313-80181ecd3228.json new file mode 100644 index 000000000..193bbcb40 --- /dev/null +++ b/data/hfopenllm_v2/ibm-granite/granite-7b-instruct/509f5b3a-6110-4757-a313-80181ecd3228.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm-granite_granite-7b-instruct/1762652580.2108219", + "retrieved_timestamp": "1762652580.2108219", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm-granite/granite-7b-instruct", + "developer": "ibm-granite", + "inference_platform": "unknown", + "id": "ibm-granite/granite-7b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2972313461615181 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37229529603269523 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40199999999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2286402925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm/PowerLM-3b/f1eb3ba0-225e-49d5-9509-422702927c9f.json b/data/hfopenllm_v2/ibm/PowerLM-3b/f1eb3ba0-225e-49d5-9509-422702927c9f.json new file mode 100644 index 000000000..932ff336f --- /dev/null +++ b/data/hfopenllm_v2/ibm/PowerLM-3b/f1eb3ba0-225e-49d5-9509-422702927c9f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm_PowerLM-3b/1762652580.205445", + "retrieved_timestamp": "1762652580.205446", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm/PowerLM-3b", + "developer": "ibm", + "inference_platform": "unknown", + "id": "ibm/PowerLM-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33212764354135915 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3679456724439114 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03625377643504532 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3562916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20162898936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GraniteForCausalLM", + "params_billions": 3.512 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ibm/merlinite-7b/7fdbc273-200d-4085-8a03-8f56cde4f2fc.json b/data/hfopenllm_v2/ibm/merlinite-7b/7fdbc273-200d-4085-8a03-8f56cde4f2fc.json new file mode 100644 index 000000000..da47a34a1 --- /dev/null +++ b/data/hfopenllm_v2/ibm/merlinite-7b/7fdbc273-200d-4085-8a03-8f56cde4f2fc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ibm_merlinite-7b/1762652580.2057128", + "retrieved_timestamp": "1762652580.205714", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ibm/merlinite-7b", + "developer": "ibm", + "inference_platform": "unknown", + "id": "ibm/merlinite-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2498703440205322 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.50071326118705 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02416918429003021 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44115624999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3068484042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.15-02.10-RP/20c0d1f9-24b8-4993-82f1-d9889c18c56a.json b/data/hfopenllm_v2/icefog72/Ice0.15-02.10-RP/20c0d1f9-24b8-4993-82f1-d9889c18c56a.json new file mode 100644 index 000000000..58da457d8 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.15-02.10-RP/20c0d1f9-24b8-4993-82f1-d9889c18c56a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.15-02.10-RP/1762652580.211034", + "retrieved_timestamp": "1762652580.211034", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.15-02.10-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.15-02.10-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5343355629729118 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4976384736188401 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43197916666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30659906914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.16-02.10-RP/824cb85d-e7a0-421a-994b-c0b178ab8e56.json b/data/hfopenllm_v2/icefog72/Ice0.16-02.10-RP/824cb85d-e7a0-421a-994b-c0b178ab8e56.json new file mode 100644 index 000000000..f41be5756 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.16-02.10-RP/824cb85d-e7a0-421a-994b-c0b178ab8e56.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.16-02.10-RP/1762652580.211284", + "retrieved_timestamp": "1762652580.211284", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.16-02.10-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.16-02.10-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5069083365470286 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4945564313654156 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.433375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3067652925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.17-03.10-RP/2faf039c-9c8e-46db-8472-6b741c451bf1.json b/data/hfopenllm_v2/icefog72/Ice0.17-03.10-RP/2faf039c-9c8e-46db-8472-6b741c451bf1.json new file mode 100644 index 000000000..5fcb99474 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.17-03.10-RP/2faf039c-9c8e-46db-8472-6b741c451bf1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.17-03.10-RP/1762652580.211494", + "retrieved_timestamp": "1762652580.211495", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.17-03.10-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.17-03.10-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5123538876846767 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5006815748225494 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.433375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30851063829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.27-06.11-RP/314c9c7e-0c13-4f6b-be25-d2a2cbc25e9b.json b/data/hfopenllm_v2/icefog72/Ice0.27-06.11-RP/314c9c7e-0c13-4f6b-be25-d2a2cbc25e9b.json new file mode 100644 index 000000000..7512defe8 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.27-06.11-RP/314c9c7e-0c13-4f6b-be25-d2a2cbc25e9b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.27-06.11-RP/1762652580.211702", + "retrieved_timestamp": "1762652580.211702", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.27-06.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.27-06.11-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49182059158588104 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5111654648230625 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43278125000000006 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3154089095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.29-06.11-RP/b07e3d05-409f-498a-a324-82c4a592d4dc.json b/data/hfopenllm_v2/icefog72/Ice0.29-06.11-RP/b07e3d05-409f-498a-a324-82c4a592d4dc.json new file mode 100644 index 000000000..3443ccf9b --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.29-06.11-RP/b07e3d05-409f-498a-a324-82c4a592d4dc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.29-06.11-RP/1762652580.2119", + "retrieved_timestamp": "1762652580.211901", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.29-06.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.29-06.11-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.486050346414181 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5087880173407883 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4458958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30925864361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.31-08.11-RP/1fc072c6-ad31-4151-8420-7402b565510d.json b/data/hfopenllm_v2/icefog72/Ice0.31-08.11-RP/1fc072c6-ad31-4151-8420-7402b565510d.json new file mode 100644 index 000000000..57cc3da2c --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.31-08.11-RP/1fc072c6-ad31-4151-8420-7402b565510d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.31-08.11-RP/1762652580.212094", + "retrieved_timestamp": "1762652580.212095", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.31-08.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.31-08.11-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5145768782386291 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5032134100285419 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42766666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3130817819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.32-10.11-RP/68e99fe4-634e-4462-b1db-d2d40814ff0b.json b/data/hfopenllm_v2/icefog72/Ice0.32-10.11-RP/68e99fe4-634e-4462-b1db-d2d40814ff0b.json new file mode 100644 index 000000000..4c606bc49 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.32-10.11-RP/68e99fe4-634e-4462-b1db-d2d40814ff0b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.32-10.11-RP/1762652580.2122939", + "retrieved_timestamp": "1762652580.2122948", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.32-10.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.32-10.11-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49154576523623983 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5047695597611622 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4382083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3100066489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.34b-14.11-RP/ed2a47c3-06c7-451b-94cd-8cd42be2ca9c.json b/data/hfopenllm_v2/icefog72/Ice0.34b-14.11-RP/ed2a47c3-06c7-451b-94cd-8cd42be2ca9c.json new file mode 100644 index 000000000..bbde5edbc --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.34b-14.11-RP/ed2a47c3-06c7-451b-94cd-8cd42be2ca9c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.34b-14.11-RP/1762652580.2124958", + "retrieved_timestamp": "1762652580.212497", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.34b-14.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.34b-14.11-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47620868185303883 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5067195329696937 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4419895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3125 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.34n-14.11-RP/8c6aae5b-6a9b-47fb-908b-6b51159cc9b2.json b/data/hfopenllm_v2/icefog72/Ice0.34n-14.11-RP/8c6aae5b-6a9b-47fb-908b-6b51159cc9b2.json new file mode 100644 index 000000000..8b2f923f2 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.34n-14.11-RP/8c6aae5b-6a9b-47fb-908b-6b51159cc9b2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.34n-14.11-RP/1762652580.2127092", + "retrieved_timestamp": "1762652580.21271", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.34n-14.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.34n-14.11-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47865663107222167 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5091090160356474 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07250755287009064 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4379583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31241688829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.37-18.11-RP/774c0461-5e81-436a-9347-7a4cc15ca019.json b/data/hfopenllm_v2/icefog72/Ice0.37-18.11-RP/774c0461-5e81-436a-9347-7a4cc15ca019.json new file mode 100644 index 000000000..e6fe45dfb --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.37-18.11-RP/774c0461-5e81-436a-9347-7a4cc15ca019.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.37-18.11-RP/1762652580.212915", + "retrieved_timestamp": "1762652580.212916", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.37-18.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.37-18.11-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4972162750391184 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5084310833712639 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06419939577039276 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43392708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3143284574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.38-19.11-RP/4d13aaf7-a18d-4bad-ab22-8e08c3f2e16a.json b/data/hfopenllm_v2/icefog72/Ice0.38-19.11-RP/4d13aaf7-a18d-4bad-ab22-8e08c3f2e16a.json new file mode 100644 index 000000000..22807c6c2 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.38-19.11-RP/4d13aaf7-a18d-4bad-ab22-8e08c3f2e16a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.38-19.11-RP/1762652580.213116", + "retrieved_timestamp": "1762652580.213117", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.38-19.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.38-19.11-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44033830237104216 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.510108216407024 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43671875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31399601063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.39-19.11-RP/780c711f-774b-499e-881e-25dba76273a1.json b/data/hfopenllm_v2/icefog72/Ice0.39-19.11-RP/780c711f-774b-499e-881e-25dba76273a1.json new file mode 100644 index 000000000..db9ea9a53 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.39-19.11-RP/780c711f-774b-499e-881e-25dba76273a1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.39-19.11-RP/1762652580.2133162", + "retrieved_timestamp": "1762652580.2133162", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.39-19.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.39-19.11-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47565902915375646 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5092985137525424 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04984894259818731 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4341458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3126662234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.40-20.11-RP/5220bee5-74d3-4730-9fee-4ca488e1a37e.json b/data/hfopenllm_v2/icefog72/Ice0.40-20.11-RP/5220bee5-74d3-4730-9fee-4ca488e1a37e.json new file mode 100644 index 000000000..50573a0e7 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.40-20.11-RP/5220bee5-74d3-4730-9fee-4ca488e1a37e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.40-20.11-RP/1762652580.2136111", + "retrieved_timestamp": "1762652580.213614", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.40-20.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.40-20.11-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4762585495374495 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.509308586549064 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06419939577039276 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44459374999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30992353723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.41-22.11-RP/43a30cf0-ccb5-46ce-b520-55ee110002c9.json b/data/hfopenllm_v2/icefog72/Ice0.41-22.11-RP/43a30cf0-ccb5-46ce-b520-55ee110002c9.json new file mode 100644 index 000000000..529cd444b --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.41-22.11-RP/43a30cf0-ccb5-46ce-b520-55ee110002c9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.41-22.11-RP/1762652580.213999", + "retrieved_timestamp": "1762652580.2140002", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.41-22.11-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.41-22.11-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4620451513096362 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4723318624775949 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030966767371601207 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45597916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26180186170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.50-16.01-RP/37602e25-bd23-462a-8566-38f3b0fee63d.json b/data/hfopenllm_v2/icefog72/Ice0.50-16.01-RP/37602e25-bd23-462a-8566-38f3b0fee63d.json new file mode 100644 index 000000000..e7980420a --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.50-16.01-RP/37602e25-bd23-462a-8566-38f3b0fee63d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.50-16.01-RP/1762652580.214273", + "retrieved_timestamp": "1762652580.214274", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.50-16.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.50-16.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43848987353555235 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49804682910006176 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04682779456193353 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4380520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30693151595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.50.1-16.01-RP/fde6323e-0bfe-4ec9-aa86-4371bbd1645a.json b/data/hfopenllm_v2/icefog72/Ice0.50.1-16.01-RP/fde6323e-0bfe-4ec9-aa86-4371bbd1645a.json new file mode 100644 index 000000000..9e2a1d58b --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.50.1-16.01-RP/fde6323e-0bfe-4ec9-aa86-4371bbd1645a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.50.1-16.01-RP/1762652580.214615", + "retrieved_timestamp": "1762652580.214617", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.50.1-16.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.50.1-16.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4829031414424837 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5107472937598788 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43274999999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3132480053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.51-16.01-RP/7a137ac4-8445-4c1a-9203-abc5f4131213.json b/data/hfopenllm_v2/icefog72/Ice0.51-16.01-RP/7a137ac4-8445-4c1a-9203-abc5f4131213.json new file mode 100644 index 000000000..3e4e54936 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.51-16.01-RP/7a137ac4-8445-4c1a-9203-abc5f4131213.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.51-16.01-RP/1762652580.214901", + "retrieved_timestamp": "1762652580.214902", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.51-16.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.51-16.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4430610779398662 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5044464794803141 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44366666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30601728723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.51.1-16.01-RP/859a9706-f73b-4426-9c5a-052625d62f5b.json b/data/hfopenllm_v2/icefog72/Ice0.51.1-16.01-RP/859a9706-f73b-4426-9c5a-052625d62f5b.json new file mode 100644 index 000000000..f9bab5690 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.51.1-16.01-RP/859a9706-f73b-4426-9c5a-052625d62f5b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.51.1-16.01-RP/1762652580.215148", + "retrieved_timestamp": "1762652580.2151492", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.51.1-16.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.51.1-16.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4573243438520902 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5121083021452105 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06419939577039276 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43938541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104222074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.52-16.01-RP/72412b78-cc3e-4652-9034-32c72aee5796.json b/data/hfopenllm_v2/icefog72/Ice0.52-16.01-RP/72412b78-cc3e-4652-9034-32c72aee5796.json new file mode 100644 index 000000000..6a1fed871 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.52-16.01-RP/72412b78-cc3e-4652-9034-32c72aee5796.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.52-16.01-RP/1762652580.21541", + "retrieved_timestamp": "1762652580.215412", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.52-16.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.52-16.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4503051902285935 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.504677500406742 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43960416666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3080119680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.52.1-16.01-RP/6bfbd9d6-b376-4169-8e6a-2c3210040e97.json b/data/hfopenllm_v2/icefog72/Ice0.52.1-16.01-RP/6bfbd9d6-b376-4169-8e6a-2c3210040e97.json new file mode 100644 index 000000000..fdaa843bd --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.52.1-16.01-RP/6bfbd9d6-b376-4169-8e6a-2c3210040e97.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.52.1-16.01-RP/1762652580.21567", + "retrieved_timestamp": "1762652580.215671", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.52.1-16.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.52.1-16.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45492626231731803 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.510648341878344 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06268882175226587 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43938541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31050531914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.53-16.01-RP/6415adfc-35a9-480c-a740-dac02725c8f0.json b/data/hfopenllm_v2/icefog72/Ice0.53-16.01-RP/6415adfc-35a9-480c-a740-dac02725c8f0.json new file mode 100644 index 000000000..01c39db1b --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.53-16.01-RP/6415adfc-35a9-480c-a740-dac02725c8f0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.53-16.01-RP/1762652580.215963", + "retrieved_timestamp": "1762652580.2159638", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.53-16.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.53-16.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4741352943523185 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5101675133484068 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43274999999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31299867021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.54-17.01-RP/94d01e56-d7d5-4680-b577-ebcc0198ca0c.json b/data/hfopenllm_v2/icefog72/Ice0.54-17.01-RP/94d01e56-d7d5-4680-b577-ebcc0198ca0c.json new file mode 100644 index 000000000..adbd2bb7c --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.54-17.01-RP/94d01e56-d7d5-4680-b577-ebcc0198ca0c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.54-17.01-RP/1762652580.2162719", + "retrieved_timestamp": "1762652580.2162728", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.54-17.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.54-17.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4378903531518593 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4853448809638454 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48741666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23262965425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.55-17.01-RP/a2de66f0-bbd1-40b9-95d3-74e0335b853b.json b/data/hfopenllm_v2/icefog72/Ice0.55-17.01-RP/a2de66f0-bbd1-40b9-95d3-74e0335b853b.json new file mode 100644 index 000000000..b74fe190b --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.55-17.01-RP/a2de66f0-bbd1-40b9-95d3-74e0335b853b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.55-17.01-RP/1762652580.2165911", + "retrieved_timestamp": "1762652580.2165918", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.55-17.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.55-17.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.496067101956143 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5076567509425027 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4725 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2657912234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.57-17.01-RP/8d99bf0e-7db0-46f5-96a0-7f977b8cf5f2.json b/data/hfopenllm_v2/icefog72/Ice0.57-17.01-RP/8d99bf0e-7db0-46f5-96a0-7f977b8cf5f2.json new file mode 100644 index 000000000..309d5e462 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.57-17.01-RP/8d99bf0e-7db0-46f5-96a0-7f977b8cf5f2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.57-17.01-RP/1762652580.216822", + "retrieved_timestamp": "1762652580.216822", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.57-17.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.57-17.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5151763986223221 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5064080420224116 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46859375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26512632978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.60-18.01-RP/b5c42995-f1fe-4a7e-90c1-d8fb00cba116.json b/data/hfopenllm_v2/icefog72/Ice0.60-18.01-RP/b5c42995-f1fe-4a7e-90c1-d8fb00cba116.json new file mode 100644 index 000000000..08d147028 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.60-18.01-RP/b5c42995-f1fe-4a7e-90c1-d8fb00cba116.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.60-18.01-RP/1762652580.217043", + "retrieved_timestamp": "1762652580.2170439", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.60-18.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.60-18.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5374329002601985 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5093724614980669 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46704166666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28366023936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.60.1-18.01-RP/8a14ed64-1408-469e-ab8d-05c897904d20.json b/data/hfopenllm_v2/icefog72/Ice0.60.1-18.01-RP/8a14ed64-1408-469e-ab8d-05c897904d20.json new file mode 100644 index 000000000..8e0e391d2 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.60.1-18.01-RP/8a14ed64-1408-469e-ab8d-05c897904d20.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.60.1-18.01-RP/1762652580.217258", + "retrieved_timestamp": "1762652580.217259", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.60.1-18.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.60.1-18.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5187735209244804 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5119675522804026 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04607250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4497708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2913896276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.61-18.01-RP/1c166a10-c176-42c7-9421-750e170f5706.json b/data/hfopenllm_v2/icefog72/Ice0.61-18.01-RP/1c166a10-c176-42c7-9421-750e170f5706.json new file mode 100644 index 000000000..e4e55fdfe --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.61-18.01-RP/1c166a10-c176-42c7-9421-750e170f5706.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.61-18.01-RP/1762652580.2174668", + "retrieved_timestamp": "1762652580.2174678", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.61-18.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.61-18.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5441273598496433 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5104839613346842 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04682779456193353 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4697395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27086103723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.62-18.01-RP/0c5bb530-f59b-4097-8a79-9e4f524385a2.json b/data/hfopenllm_v2/icefog72/Ice0.62-18.01-RP/0c5bb530-f59b-4097-8a79-9e4f524385a2.json new file mode 100644 index 000000000..c786d5edb --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.62-18.01-RP/0c5bb530-f59b-4097-8a79-9e4f524385a2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.62-18.01-RP/1762652580.21767", + "retrieved_timestamp": "1762652580.217671", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.62-18.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.62-18.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.536733644507684 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5103327208197285 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4537708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28773271276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.62.1-24.01-RP/26ba869e-ae3b-44ef-a215-f94e4e4cb1fc.json b/data/hfopenllm_v2/icefog72/Ice0.62.1-24.01-RP/26ba869e-ae3b-44ef-a215-f94e4e4cb1fc.json new file mode 100644 index 000000000..60bbbb401 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.62.1-24.01-RP/26ba869e-ae3b-44ef-a215-f94e4e4cb1fc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.62.1-24.01-RP/1762652580.2178729", + "retrieved_timestamp": "1762652580.2178729", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.62.1-24.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.62.1-24.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5181740005407873 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5108967760246949 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.055891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45510416666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28706781914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.64-24.01-RP/d7313786-f553-454e-b2c8-62a0162c9339.json b/data/hfopenllm_v2/icefog72/Ice0.64-24.01-RP/d7313786-f553-454e-b2c8-62a0162c9339.json new file mode 100644 index 000000000..2cb41ea4f --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.64-24.01-RP/d7313786-f553-454e-b2c8-62a0162c9339.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.64-24.01-RP/1762652580.218076", + "retrieved_timestamp": "1762652580.218076", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.64-24.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.64-24.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5440774921652327 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5059610114856247 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06268882175226587 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4620208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29330119680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.64.1-24.01-RP/359daeb1-3546-473f-801b-c9942fd010aa.json b/data/hfopenllm_v2/icefog72/Ice0.64.1-24.01-RP/359daeb1-3546-473f-801b-c9942fd010aa.json new file mode 100644 index 000000000..519815ceb --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.64.1-24.01-RP/359daeb1-3546-473f-801b-c9942fd010aa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.64.1-24.01-RP/1762652580.218272", + "retrieved_timestamp": "1762652580.218272", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.64.1-24.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.64.1-24.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5446770125489258 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5059610114856247 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06268882175226587 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4620208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29330119680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.65-25.01-RP/fa5d2148-c45b-4266-a6a0-11b471273f75.json b/data/hfopenllm_v2/icefog72/Ice0.65-25.01-RP/fa5d2148-c45b-4266-a6a0-11b471273f75.json new file mode 100644 index 000000000..141d3c066 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.65-25.01-RP/fa5d2148-c45b-4266-a6a0-11b471273f75.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.65-25.01-RP/1762652580.2184708", + "retrieved_timestamp": "1762652580.218472", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.65-25.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.65-25.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5029366525264077 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5095976254774931 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4339583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29970079787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.66-25.01-RP/b619dad2-fcb2-45ab-b603-ae1da3916eb7.json b/data/hfopenllm_v2/icefog72/Ice0.66-25.01-RP/b619dad2-fcb2-45ab-b603-ae1da3916eb7.json new file mode 100644 index 000000000..d86101648 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.66-25.01-RP/b619dad2-fcb2-45ab-b603-ae1da3916eb7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.66-25.01-RP/1762652580.2186701", + "retrieved_timestamp": "1762652580.2186701", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.66-25.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.66-25.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.532487134137422 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5128983540188711 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44344791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3039394946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.67-25.01-RP/cf0a4a2d-a104-43cf-ac01-66250e880ff0.json b/data/hfopenllm_v2/icefog72/Ice0.67-25.01-RP/cf0a4a2d-a104-43cf-ac01-66250e880ff0.json new file mode 100644 index 000000000..a22ee2bcd --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.67-25.01-RP/cf0a4a2d-a104-43cf-ac01-66250e880ff0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.67-25.01-RP/1762652580.21887", + "retrieved_timestamp": "1762652580.218871", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.67-25.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.67-25.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.536134124123991 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5112894150790012 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07477341389728097 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42788541666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30967420212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.68-25.01-RP/dd7cb16f-0752-4639-aa99-90b9be448295.json b/data/hfopenllm_v2/icefog72/Ice0.68-25.01-RP/dd7cb16f-0752-4639-aa99-90b9be448295.json new file mode 100644 index 000000000..cc1cba248 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.68-25.01-RP/dd7cb16f-0752-4639-aa99-90b9be448295.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.68-25.01-RP/1762652580.2190669", + "retrieved_timestamp": "1762652580.2190678", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.68-25.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.68-25.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5513714721383707 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5130058094823416 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07250755287009064 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44456249999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011968085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.69-25.01-RP/643da0d0-176a-40dd-b096-5aac8de827e9.json b/data/hfopenllm_v2/icefog72/Ice0.69-25.01-RP/643da0d0-176a-40dd-b096-5aac8de827e9.json new file mode 100644 index 000000000..a8a3db138 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.69-25.01-RP/643da0d0-176a-40dd-b096-5aac8de827e9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.69-25.01-RP/1762652580.219263", + "retrieved_timestamp": "1762652580.219264", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.69-25.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.69-25.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5437527981311808 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5097683665599672 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4485625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29654255319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.7-29.09-RP/9c6cf7a1-1a17-4070-9ce3-633461334f42.json b/data/hfopenllm_v2/icefog72/Ice0.7-29.09-RP/9c6cf7a1-1a17-4070-9ce3-633461334f42.json new file mode 100644 index 000000000..f38ebd7de --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.7-29.09-RP/9c6cf7a1-1a17-4070-9ce3-633461334f42.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.7-29.09-RP/1762652580.2194638", + "retrieved_timestamp": "1762652580.219465", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.7-29.09-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.7-29.09-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5175744801570943 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5047661992357916 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4237916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3126662234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.70-25.01-RP/e109acd0-c7e3-4a9f-8e06-c428b95acc83.json b/data/hfopenllm_v2/icefog72/Ice0.70-25.01-RP/e109acd0-c7e3-4a9f-8e06-c428b95acc83.json new file mode 100644 index 000000000..19251c603 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.70-25.01-RP/e109acd0-c7e3-4a9f-8e06-c428b95acc83.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.70-25.01-RP/1762652580.2196732", + "retrieved_timestamp": "1762652580.219674", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.70-25.01-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.70-25.01-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.549797869652522 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.513632436415875 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05966767371601209 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45119791666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2996176861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.70.1-01.02-RP/ee088f70-5734-4951-8bc0-e0579a053fd2.json b/data/hfopenllm_v2/icefog72/Ice0.70.1-01.02-RP/ee088f70-5734-4951-8bc0-e0579a053fd2.json new file mode 100644 index 000000000..a2c0cc198 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.70.1-01.02-RP/ee088f70-5734-4951-8bc0-e0579a053fd2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.70.1-01.02-RP/1762652580.219877", + "retrieved_timestamp": "1762652580.219877", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.70.1-01.02-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.70.1-01.02-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5069582042314393 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5059798926804829 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.033987915407854986 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4599166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2748503989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.73-01.02-RP/ba7bf09f-b7a1-4fd4-b262-4929a81da34a.json b/data/hfopenllm_v2/icefog72/Ice0.73-01.02-RP/ba7bf09f-b7a1-4fd4-b262-4929a81da34a.json new file mode 100644 index 000000000..479feec80 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.73-01.02-RP/ba7bf09f-b7a1-4fd4-b262-4929a81da34a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.73-01.02-RP/1762652580.220075", + "retrieved_timestamp": "1762652580.220076", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.73-01.02-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.73-01.02-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.529164838184905 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5103425890792322 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46639583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27019614361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.74-02.02-RP/7470c7d4-80fe-4e88-a695-c628f9ed3682.json b/data/hfopenllm_v2/icefog72/Ice0.74-02.02-RP/7470c7d4-80fe-4e88-a695-c628f9ed3682.json new file mode 100644 index 000000000..c335ab5ec --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.74-02.02-RP/7470c7d4-80fe-4e88-a695-c628f9ed3682.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.74-02.02-RP/1762652580.220269", + "retrieved_timestamp": "1762652580.2202702", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.74-02.02-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.74-02.02-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2935344884905384 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4646134965075064 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0015105740181268882 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42804166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21434507978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.76-02.02-RP/701743bb-1ddf-4810-824a-38959d4a0e02.json b/data/hfopenllm_v2/icefog72/Ice0.76-02.02-RP/701743bb-1ddf-4810-824a-38959d4a0e02.json new file mode 100644 index 000000000..1328b8b45 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.76-02.02-RP/701743bb-1ddf-4810-824a-38959d4a0e02.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.76-02.02-RP/1762652580.220735", + "retrieved_timestamp": "1762652580.220737", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.76-02.02-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.76-02.02-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45290274250100837 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5085610407875073 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43616666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2652094414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.77-02.02-RP/0eebefc6-138f-4af5-a8b6-a35c798a38cb.json b/data/hfopenllm_v2/icefog72/Ice0.77-02.02-RP/0eebefc6-138f-4af5-a8b6-a35c798a38cb.json new file mode 100644 index 000000000..336e66630 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.77-02.02-RP/0eebefc6-138f-4af5-a8b6-a35c798a38cb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.77-02.02-RP/1762652580.221007", + "retrieved_timestamp": "1762652580.2210078", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.77-02.02-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.77-02.02-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5309633993359841 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5109257300160749 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03927492447129909 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4765 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29986702127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.78-02.02-RP/ec943fa1-b138-46e8-b1ae-c9a476c73ed1.json b/data/hfopenllm_v2/icefog72/Ice0.78-02.02-RP/ec943fa1-b138-46e8-b1ae-c9a476c73ed1.json new file mode 100644 index 000000000..284dbd106 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.78-02.02-RP/ec943fa1-b138-46e8-b1ae-c9a476c73ed1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.78-02.02-RP/1762652580.221266", + "retrieved_timestamp": "1762652580.221267", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.78-02.02-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.78-02.02-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.405292401937969 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5002126961381052 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.468625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2954621010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/Ice0.80-03.02-RP/847b4e14-a07c-45ed-b2eb-ecea0f80147b.json b/data/hfopenllm_v2/icefog72/Ice0.80-03.02-RP/847b4e14-a07c-45ed-b2eb-ecea0f80147b.json new file mode 100644 index 000000000..093263aaf --- /dev/null +++ b/data/hfopenllm_v2/icefog72/Ice0.80-03.02-RP/847b4e14-a07c-45ed-b2eb-ecea0f80147b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_Ice0.80-03.02-RP/1762652580.2214909", + "retrieved_timestamp": "1762652580.221492", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/Ice0.80-03.02-RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/Ice0.80-03.02-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5516462984880118 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5097962218679292 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.055891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4923125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2912234042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceCocoaRP-7b/5427828d-b53d-4e44-82ed-df6a9c0f9a47.json b/data/hfopenllm_v2/icefog72/IceCocoaRP-7b/5427828d-b53d-4e44-82ed-df6a9c0f9a47.json new file mode 100644 index 000000000..251f6ece1 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceCocoaRP-7b/5427828d-b53d-4e44-82ed-df6a9c0f9a47.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_IceCocoaRP-7b/1762652580.2217228", + "retrieved_timestamp": "1762652580.2217238", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/IceCocoaRP-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/IceCocoaRP-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4962421929369628 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4937902147076245 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4197916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3098404255319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceCoffeeRP-7b/bf5e2b11-79ce-49ed-947b-fb34110a3802.json b/data/hfopenllm_v2/icefog72/IceCoffeeRP-7b/bf5e2b11-79ce-49ed-947b-fb34110a3802.json new file mode 100644 index 000000000..57290145a --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceCoffeeRP-7b/bf5e2b11-79ce-49ed-947b-fb34110a3802.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_IceCoffeeRP-7b/1762652580.2220101", + "retrieved_timestamp": "1762652580.2220109", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/IceCoffeeRP-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/IceCoffeeRP-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4959174989029109 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48887216244327214 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4159791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2974567819148936 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceDrinkByFrankensteinV3RP/39325b65-ad12-44ef-a1bf-ffe9e870ced8.json b/data/hfopenllm_v2/icefog72/IceDrinkByFrankensteinV3RP/39325b65-ad12-44ef-a1bf-ffe9e870ced8.json new file mode 100644 index 000000000..ce1ec6f1c --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceDrinkByFrankensteinV3RP/39325b65-ad12-44ef-a1bf-ffe9e870ced8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_IceDrinkByFrankensteinV3RP/1762652580.222236", + "retrieved_timestamp": "1762652580.222236", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/IceDrinkByFrankensteinV3RP", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/IceDrinkByFrankensteinV3RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4974911013887596 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4832523723413275 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4253125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.292719414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock/b0aaf6e9-ffe3-4de9-b3f5-c33d52b59ed2.json b/data/hfopenllm_v2/icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock/b0aaf6e9-ffe3-4de9-b3f5-c33d52b59ed2.json new file mode 100644 index 000000000..79ae461d0 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock/b0aaf6e9-ffe3-4de9-b3f5-c33d52b59ed2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_IceDrinkNameGoesHereRP-7b-Model_Stock/1762652580.2224698", + "retrieved_timestamp": "1762652580.2224698", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49684171332065585 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46578646938927254 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4067395833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2816655585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock/f0e6fa5e-20c2-407d-8301-70d86cb1a51f.json b/data/hfopenllm_v2/icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock/f0e6fa5e-20c2-407d-8301-70d86cb1a51f.json new file mode 100644 index 000000000..94fa3521b --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock/f0e6fa5e-20c2-407d-8301-70d86cb1a51f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_IceDrinkNameNotFoundRP-7b-Model_Stock/1762652580.2227032", + "retrieved_timestamp": "1762652580.2227042", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5130032757527804 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.502625425089929 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4371875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3064328457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceDrunkCherryRP-7b/c0e3f4ee-52dc-45c3-844a-8cc4e4520f24.json b/data/hfopenllm_v2/icefog72/IceDrunkCherryRP-7b/c0e3f4ee-52dc-45c3-844a-8cc4e4520f24.json new file mode 100644 index 000000000..920bb7849 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceDrunkCherryRP-7b/c0e3f4ee-52dc-45c3-844a-8cc4e4520f24.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_IceDrunkCherryRP-7b/1762652580.222923", + "retrieved_timestamp": "1762652580.222924", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/IceDrunkCherryRP-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/IceDrunkCherryRP-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48982255969715904 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4846629039263151 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4291875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3009474734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceDrunkenCherryRP-7b/9d1e6b55-aa7c-4fea-8a77-92795c0ee60a.json b/data/hfopenllm_v2/icefog72/IceDrunkenCherryRP-7b/9d1e6b55-aa7c-4fea-8a77-92795c0ee60a.json new file mode 100644 index 000000000..9ac74b51a --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceDrunkenCherryRP-7b/9d1e6b55-aa7c-4fea-8a77-92795c0ee60a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_IceDrunkenCherryRP-7b/1762652580.223197", + "retrieved_timestamp": "1762652580.223207", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/IceDrunkenCherryRP-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/IceDrunkenCherryRP-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4762585495374495 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.509308586549064 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06419939577039276 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44459374999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30992353723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceEspressoRPv2-7b/ade14c35-442b-4a0a-8345-99b7b58dc194.json b/data/hfopenllm_v2/icefog72/IceEspressoRPv2-7b/ade14c35-442b-4a0a-8345-99b7b58dc194.json new file mode 100644 index 000000000..5deea65a5 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceEspressoRPv2-7b/ade14c35-442b-4a0a-8345-99b7b58dc194.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_IceEspressoRPv2-7b/1762652580.223459", + "retrieved_timestamp": "1762652580.2234602", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/IceEspressoRPv2-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/IceEspressoRPv2-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4977160600539901 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5054890156350785 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.061933534743202415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43306249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3061003989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceLemonTeaRP-32k-7b/fd90b65b-7b6f-4ca2-93e3-59486c0ee070.json b/data/hfopenllm_v2/icefog72/IceLemonTeaRP-32k-7b/fd90b65b-7b6f-4ca2-93e3-59486c0ee070.json new file mode 100644 index 000000000..cfa6c21f9 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceLemonTeaRP-32k-7b/fd90b65b-7b6f-4ca2-93e3-59486c0ee070.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_IceLemonTeaRP-32k-7b/1762652580.2236779", + "retrieved_timestamp": "1762652580.223679", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/IceLemonTeaRP-32k-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/IceLemonTeaRP-32k-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5212214701436633 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49973852418379305 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42903125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3067652925531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceMartiniRP-7b/210bea5c-35de-4bd6-93db-871704add0d6.json b/data/hfopenllm_v2/icefog72/IceMartiniRP-7b/210bea5c-35de-4bd6-93db-871704add0d6.json new file mode 100644 index 000000000..78ea25360 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceMartiniRP-7b/210bea5c-35de-4bd6-93db-871704add0d6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_IceMartiniRP-7b/1762652580.223922", + "retrieved_timestamp": "1762652580.223923", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/IceMartiniRP-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/IceMartiniRP-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5044603873278457 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4972421837639585 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4344895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3073470744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceNalyvkaRP-7b/95dd235d-6930-48fd-8594-5acb0110be29.json b/data/hfopenllm_v2/icefog72/IceNalyvkaRP-7b/95dd235d-6930-48fd-8594-5acb0110be29.json new file mode 100644 index 000000000..41d711c92 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceNalyvkaRP-7b/95dd235d-6930-48fd-8594-5acb0110be29.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_IceNalyvkaRP-7b/1762652580.224114", + "retrieved_timestamp": "1762652580.224115", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/IceNalyvkaRP-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/IceNalyvkaRP-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.549797869652522 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.513632436415875 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05966767371601209 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45119791666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2996176861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceSakeRP-7b/67e351c8-6cca-4982-86e9-e774786c6862.json b/data/hfopenllm_v2/icefog72/IceSakeRP-7b/67e351c8-6cca-4982-86e9-e774786c6862.json new file mode 100644 index 000000000..fc300ec4c --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceSakeRP-7b/67e351c8-6cca-4982-86e9-e774786c6862.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_IceSakeRP-7b/1762652580.2243059", + "retrieved_timestamp": "1762652580.224307", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/IceSakeRP-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/IceSakeRP-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5227950726295119 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5119287057484642 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41300000000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3176529255319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceSakeV4RP-7b/93b5850f-74d0-45cd-977e-5bf6e4dc5d8d.json b/data/hfopenllm_v2/icefog72/IceSakeV4RP-7b/93b5850f-74d0-45cd-977e-5bf6e4dc5d8d.json new file mode 100644 index 000000000..9d46f9a0d --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceSakeV4RP-7b/93b5850f-74d0-45cd-977e-5bf6e4dc5d8d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_IceSakeV4RP-7b/1762652580.224551", + "retrieved_timestamp": "1762652580.224552", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/IceSakeV4RP-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/IceSakeV4RP-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4634192830578421 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4929557826908731 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.055891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40819791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31025598404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceSakeV6RP-7b/e9ebbcbf-81d5-494b-95a1-4e79feb42c40.json b/data/hfopenllm_v2/icefog72/IceSakeV6RP-7b/e9ebbcbf-81d5-494b-95a1-4e79feb42c40.json new file mode 100644 index 000000000..97d193f32 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceSakeV6RP-7b/e9ebbcbf-81d5-494b-95a1-4e79feb42c40.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_IceSakeV6RP-7b/1762652580.224776", + "retrieved_timestamp": "1762652580.224777", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/IceSakeV6RP-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/IceSakeV6RP-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5032613465604596 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49760336362566354 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.061933534743202415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42001041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3093417553191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceSakeV8RP-7b/dbeb9a8a-53c5-472b-a4b1-1aa0582f8486.json b/data/hfopenllm_v2/icefog72/IceSakeV8RP-7b/dbeb9a8a-53c5-472b-a4b1-1aa0582f8486.json new file mode 100644 index 000000000..af0564a96 --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceSakeV8RP-7b/dbeb9a8a-53c5-472b-a4b1-1aa0582f8486.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_IceSakeV8RP-7b/1762652580.2249868", + "retrieved_timestamp": "1762652580.224988", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/IceSakeV8RP-7b", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/IceSakeV8RP-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6085741388404988 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48847141337960176 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05966767371601209 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3992708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.301030585106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5/f4d3a112-d529-48f8-a99e-85e9eb02e0c1.json b/data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5/f4d3a112-d529-48f8-a99e-85e9eb02e0c1.json new file mode 100644 index 000000000..45a9602ad --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5/f4d3a112-d529-48f8-a99e-85e9eb02e0c1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_IceTea21EnergyDrinkRPV13-DPOv3.5/1762652580.2254012", + "retrieved_timestamp": "1762652580.225402", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48709978412833504 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4399660013109026 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03625377643504532 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39641666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24983377659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3/4b4a9630-c942-445e-b396-4a988d489aa7.json b/data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3/4b4a9630-c942-445e-b396-4a988d489aa7.json new file mode 100644 index 000000000..87712986f --- /dev/null +++ b/data/hfopenllm_v2/icefog72/IceTea21EnergyDrinkRPV13-DPOv3/4b4a9630-c942-445e-b396-4a988d489aa7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/icefog72_IceTea21EnergyDrinkRPV13-DPOv3/1762652580.225198", + "retrieved_timestamp": "1762652580.2251992", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3", + "developer": "icefog72", + "inference_platform": "unknown", + "id": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5263423272472595 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5019587584232624 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0581570996978852 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4371875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30560172872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ilsp/Llama-Krikri-8B-Instruct/592bd629-d0bf-48b0-83c6-abfa3731fd14.json b/data/hfopenllm_v2/ilsp/Llama-Krikri-8B-Instruct/592bd629-d0bf-48b0-83c6-abfa3731fd14.json new file mode 100644 index 000000000..f8bc45a98 --- /dev/null +++ b/data/hfopenllm_v2/ilsp/Llama-Krikri-8B-Instruct/592bd629-d0bf-48b0-83c6-abfa3731fd14.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ilsp_Llama-Krikri-8B-Instruct/1762652580.225861", + "retrieved_timestamp": "1762652580.225861", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ilsp/Llama-Krikri-8B-Instruct", + "developer": "ilsp", + "inference_platform": "unknown", + "id": "ilsp/Llama-Krikri-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6078748830879843 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.504664191645287 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11782477341389729 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4079791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3312832446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.202 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/inflatebot/MN-12B-Mag-Mell-R1/43f7613d-bd9f-480d-a2ed-dcabf3169944.json b/data/hfopenllm_v2/inflatebot/MN-12B-Mag-Mell-R1/43f7613d-bd9f-480d-a2ed-dcabf3169944.json new file mode 100644 index 000000000..c8d7e5e82 --- /dev/null +++ b/data/hfopenllm_v2/inflatebot/MN-12B-Mag-Mell-R1/43f7613d-bd9f-480d-a2ed-dcabf3169944.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/inflatebot_MN-12B-Mag-Mell-R1/1762652580.2261078", + "retrieved_timestamp": "1762652580.226109", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "inflatebot/MN-12B-Mag-Mell-R1", + "developer": "inflatebot", + "inference_platform": "unknown", + "id": "inflatebot/MN-12B-Mag-Mell-R1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46129602787271107 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5303854975434981 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40022916666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34383311170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/informatiker/Qwen2-7B-Instruct-abliterated/be1ab009-3aa6-43da-8b8e-11e5287a0370.json b/data/hfopenllm_v2/informatiker/Qwen2-7B-Instruct-abliterated/be1ab009-3aa6-43da-8b8e-11e5287a0370.json new file mode 100644 index 000000000..5cec04200 --- /dev/null +++ b/data/hfopenllm_v2/informatiker/Qwen2-7B-Instruct-abliterated/be1ab009-3aa6-43da-8b8e-11e5287a0370.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/informatiker_Qwen2-7B-Instruct-abliterated/1762652580.2263439", + "retrieved_timestamp": "1762652580.226345", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "informatiker/Qwen2-7B-Instruct-abliterated", + "developer": "informatiker", + "inference_platform": "unknown", + "id": "informatiker/Qwen2-7B-Instruct-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5821708622011817 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5534265515936739 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.263595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38879166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3873005319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/3986b43c-2752-4a8f-b1e1-c3657734f84b.json b/data/hfopenllm_v2/insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/3986b43c-2752-4a8f-b1e1-c3657734f84b.json new file mode 100644 index 000000000..86498f734 --- /dev/null +++ b/data/hfopenllm_v2/insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/3986b43c-2752-4a8f-b1e1-c3657734f84b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/insightfactory_Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/1762652580.226581", + "retrieved_timestamp": "1762652580.226582", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model", + "developer": "insightfactory", + "inference_platform": "unknown", + "id": "insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45884807865352817 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4146016381618061 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10498489425981873 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.349875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2960438829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "", + "params_billions": 1.933 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/instruction-pretrain/InstructLM-500M/38ba0438-f5ed-434e-af2e-fed71988f7b9.json b/data/hfopenllm_v2/instruction-pretrain/InstructLM-500M/38ba0438-f5ed-434e-af2e-fed71988f7b9.json new file mode 100644 index 000000000..082ae4f7d --- /dev/null +++ b/data/hfopenllm_v2/instruction-pretrain/InstructLM-500M/38ba0438-f5ed-434e-af2e-fed71988f7b9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/instruction-pretrain_InstructLM-500M/1762652580.226826", + "retrieved_timestamp": "1762652580.226826", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "instruction-pretrain/InstructLM-500M", + "developer": "instruction-pretrain", + "inference_platform": "unknown", + "id": "instruction-pretrain/InstructLM-500M" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1027662158627996 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29408717872529677 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3528229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1141123670212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 0.5 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/internlm/internlm2-1_8b/fc23ef4f-2ef1-4a3e-b029-9d646145e135.json b/data/hfopenllm_v2/internlm/internlm2-1_8b/fc23ef4f-2ef1-4a3e-b029-9d646145e135.json new file mode 100644 index 000000000..ed4541f3e --- /dev/null +++ b/data/hfopenllm_v2/internlm/internlm2-1_8b/fc23ef4f-2ef1-4a3e-b029-9d646145e135.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/internlm_internlm2-1_8b/1762652580.227062", + "retrieved_timestamp": "1762652580.227063", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "internlm/internlm2-1_8b", + "developer": "internlm", + "inference_platform": "unknown", + "id": "internlm/internlm2-1_8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2197702097102355 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3879732800028095 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.021148036253776436 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2483221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38128125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15882646276595744 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "InternLM2ForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/internlm/internlm2-7b/d4bba57d-2a3c-4945-ae47-7830840d0259.json b/data/hfopenllm_v2/internlm/internlm2-7b/d4bba57d-2a3c-4945-ae47-7830840d0259.json new file mode 100644 index 000000000..7ff338290 --- /dev/null +++ b/data/hfopenllm_v2/internlm/internlm2-7b/d4bba57d-2a3c-4945-ae47-7830840d0259.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/internlm_internlm2-7b/1762652580.2273018", + "retrieved_timestamp": "1762652580.227303", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "internlm/internlm2-7b", + "developer": "internlm", + "inference_platform": "unknown", + "id": "internlm/internlm2-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22803680981595092 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5825 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08571428571428572 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33666666666666667 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43999999999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Unknown", + "params_billions": 0.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/internlm/internlm2-chat-1_8b/767b5c7e-6319-487f-906c-2abca794f884.json b/data/hfopenllm_v2/internlm/internlm2-chat-1_8b/767b5c7e-6319-487f-906c-2abca794f884.json new file mode 100644 index 000000000..e0c3df475 --- /dev/null +++ b/data/hfopenllm_v2/internlm/internlm2-chat-1_8b/767b5c7e-6319-487f-906c-2abca794f884.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/internlm_internlm2-chat-1_8b/1762652580.227562", + "retrieved_timestamp": "1762652580.227563", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "internlm/internlm2-chat-1_8b", + "developer": "internlm", + "inference_platform": "unknown", + "id": "internlm/internlm2-chat-1_8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2386545477111841 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4452271664119214 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0324773413897281 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36305208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18392619680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "InternLM2ForCausalLM", + "params_billions": 1.889 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/internlm/internlm2_5-1_8b-chat/d37e87e2-53c3-42fa-b78d-04d2819b14d3.json b/data/hfopenllm_v2/internlm/internlm2_5-1_8b-chat/d37e87e2-53c3-42fa-b78d-04d2819b14d3.json new file mode 100644 index 000000000..2c03343fc --- /dev/null +++ b/data/hfopenllm_v2/internlm/internlm2_5-1_8b-chat/d37e87e2-53c3-42fa-b78d-04d2819b14d3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/internlm_internlm2_5-1_8b-chat/1762652580.227762", + "retrieved_timestamp": "1762652580.227763", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "internlm/internlm2_5-1_8b-chat", + "developer": "internlm", + "inference_platform": "unknown", + "id": "internlm/internlm2_5-1_8b-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38490870889240547 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4488926786996439 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15861027190332327 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35939583333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12990359042553193 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "InternLM2ForCausalLM", + "params_billions": 1.89 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/internlm/internlm2_5-20b-chat/a651c814-41e2-4951-bb8f-df799cc6e470.json b/data/hfopenllm_v2/internlm/internlm2_5-20b-chat/a651c814-41e2-4951-bb8f-df799cc6e470.json new file mode 100644 index 000000000..e2b476f9e --- /dev/null +++ b/data/hfopenllm_v2/internlm/internlm2_5-20b-chat/a651c814-41e2-4951-bb8f-df799cc6e470.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/internlm_internlm2_5-20b-chat/1762652580.2279649", + "retrieved_timestamp": "1762652580.227966", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "internlm/internlm2_5-20b-chat", + "developer": "internlm", + "inference_platform": "unknown", + "id": "internlm/internlm2_5-20b-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7009977969565198 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7473580533672884 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4558229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39976728723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "InternLM2ForCausalLM", + "params_billions": 19.86 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/internlm/internlm2_5-7b-chat/28245528-26e8-48a8-9cc8-68d7a6389bde.json b/data/hfopenllm_v2/internlm/internlm2_5-7b-chat/28245528-26e8-48a8-9cc8-68d7a6389bde.json new file mode 100644 index 000000000..202718af0 --- /dev/null +++ b/data/hfopenllm_v2/internlm/internlm2_5-7b-chat/28245528-26e8-48a8-9cc8-68d7a6389bde.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/internlm_internlm2_5-7b-chat/1762652580.2281651", + "retrieved_timestamp": "1762652580.2281659", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "internlm/internlm2_5-7b-chat", + "developer": "internlm", + "inference_platform": "unknown", + "id": "internlm/internlm2_5-7b-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5538692890419642 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7073179916851792 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25302114803625375 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34731543624161076 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45938541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3776595744680851 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "InternLM2ForCausalLM", + "params_billions": 7.738 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/intervitens/mini-magnum-12b-v1.1/8ad974e6-8d4c-45bf-86d0-f701cfc323d5.json b/data/hfopenllm_v2/intervitens/mini-magnum-12b-v1.1/8ad974e6-8d4c-45bf-86d0-f701cfc323d5.json new file mode 100644 index 000000000..b5b47b807 --- /dev/null +++ b/data/hfopenllm_v2/intervitens/mini-magnum-12b-v1.1/8ad974e6-8d4c-45bf-86d0-f701cfc323d5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/intervitens_mini-magnum-12b-v1.1/1762652580.228364", + "retrieved_timestamp": "1762652580.228365", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "intervitens/mini-magnum-12b-v1.1", + "developer": "intervitens", + "inference_platform": "unknown", + "id": "intervitens/mini-magnum-12b-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5155509603407846 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.506180035650624 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.061933534743202415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4004479166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3291223404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/inumulaisk/eval_model/e3e4a9b3-ce68-4999-966e-2ef2baf99266.json b/data/hfopenllm_v2/inumulaisk/eval_model/e3e4a9b3-ce68-4999-966e-2ef2baf99266.json new file mode 100644 index 000000000..2a1e6577b --- /dev/null +++ b/data/hfopenllm_v2/inumulaisk/eval_model/e3e4a9b3-ce68-4999-966e-2ef2baf99266.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/inumulaisk_eval_model/1762652580.228598", + "retrieved_timestamp": "1762652580.228599", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "inumulaisk/eval_model", + "developer": "inumulaisk", + "inference_platform": "unknown", + "id": "inumulaisk/eval_model" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19314197440568803 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35118774303346373 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.297583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35796875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16638962765957446 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/cdb8a900-75f3-4e6b-9d35-5a6791e8acd1.json b/data/hfopenllm_v2/invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/cdb8a900-75f3-4e6b-9d35-5a6791e8acd1.json new file mode 100644 index 000000000..91f345b2a --- /dev/null +++ b/data/hfopenllm_v2/invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/cdb8a900-75f3-4e6b-9d35-5a6791e8acd1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/invalid-coder_Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/1762652580.229043", + "retrieved_timestamp": "1762652580.229047", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp", + "developer": "invalid-coder", + "inference_platform": "unknown", + "id": "invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45547591501660034 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5158439010792586 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04909365558912387 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3992395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145777925531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/invisietch/EtherealRainbow-v0.2-8B/c60869f0-7009-48c9-be41-339335e5ee4e.json b/data/hfopenllm_v2/invisietch/EtherealRainbow-v0.2-8B/c60869f0-7009-48c9-be41-339335e5ee4e.json new file mode 100644 index 000000000..3fdad3af2 --- /dev/null +++ b/data/hfopenllm_v2/invisietch/EtherealRainbow-v0.2-8B/c60869f0-7009-48c9-be41-339335e5ee4e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/invisietch_EtherealRainbow-v0.2-8B/1762652580.229454", + "retrieved_timestamp": "1762652580.229455", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "invisietch/EtherealRainbow-v0.2-8B", + "developer": "invisietch", + "inference_platform": "unknown", + "id": "invisietch/EtherealRainbow-v0.2-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39032988027323057 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5102035205059678 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0823262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38267708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36527593085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/invisietch/EtherealRainbow-v0.3-8B/cc85ba7f-bbc0-43e7-a678-949fd5be8498.json b/data/hfopenllm_v2/invisietch/EtherealRainbow-v0.3-8B/cc85ba7f-bbc0-43e7-a678-949fd5be8498.json new file mode 100644 index 000000000..b22061cd3 --- /dev/null +++ b/data/hfopenllm_v2/invisietch/EtherealRainbow-v0.3-8B/cc85ba7f-bbc0-43e7-a678-949fd5be8498.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/invisietch_EtherealRainbow-v0.3-8B/1762652580.229776", + "retrieved_timestamp": "1762652580.2297769", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "invisietch/EtherealRainbow-v0.3-8B", + "developer": "invisietch", + "inference_platform": "unknown", + "id": "invisietch/EtherealRainbow-v0.3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36822298168858625 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5096758454539693 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07628398791540786 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39039583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36261635638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/invisietch/MiS-Firefly-v0.2-22B/6df8e489-865f-4692-a673-6abbf2159d1d.json b/data/hfopenllm_v2/invisietch/MiS-Firefly-v0.2-22B/6df8e489-865f-4692-a673-6abbf2159d1d.json new file mode 100644 index 000000000..483140058 --- /dev/null +++ b/data/hfopenllm_v2/invisietch/MiS-Firefly-v0.2-22B/6df8e489-865f-4692-a673-6abbf2159d1d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/invisietch_MiS-Firefly-v0.2-22B/1762652580.2300959", + "retrieved_timestamp": "1762652580.2300968", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "invisietch/MiS-Firefly-v0.2-22B", + "developer": "invisietch", + "inference_platform": "unknown", + "id": "invisietch/MiS-Firefly-v0.2-22B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5371082062261466 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5513523591170696 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16540785498489427 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46937500000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3620345744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/invisietch/Nimbus-Miqu-v0.1-70B/c36d07f4-b263-4849-86f9-d3fea355c068.json b/data/hfopenllm_v2/invisietch/Nimbus-Miqu-v0.1-70B/c36d07f4-b263-4849-86f9-d3fea355c068.json new file mode 100644 index 000000000..1a01518cd --- /dev/null +++ b/data/hfopenllm_v2/invisietch/Nimbus-Miqu-v0.1-70B/c36d07f4-b263-4849-86f9-d3fea355c068.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/invisietch_Nimbus-Miqu-v0.1-70B/1762652580.230321", + "retrieved_timestamp": "1762652580.230322", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "invisietch/Nimbus-Miqu-v0.1-70B", + "developer": "invisietch", + "inference_platform": "unknown", + "id": "invisietch/Nimbus-Miqu-v0.1-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46466819150963884 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.601030667794844 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3389261744966443 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41331249999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3853058510638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 68.977 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/cf6b0824-45c4-4b47-bf23-e5df5673b74e.json b/data/hfopenllm_v2/jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/cf6b0824-45c4-4b47-bf23-e5df5673b74e.json new file mode 100644 index 000000000..974070cb2 --- /dev/null +++ b/data/hfopenllm_v2/jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/cf6b0824-45c4-4b47-bf23-e5df5673b74e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaredjoss_pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/1762652580.230787", + "retrieved_timestamp": "1762652580.230787", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model", + "developer": "jaredjoss", + "inference_platform": "unknown", + "id": "jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15722172723928066 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2863444769655102 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3606979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11685505319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 0.407 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2-8B/0064f2f6-672e-478c-9184-e7fd32ad06b8.json b/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2-8B/0064f2f6-672e-478c-9184-e7fd32ad06b8.json new file mode 100644 index 000000000..34edb7307 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2-8B/0064f2f6-672e-478c-9184-e7fd32ad06b8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Auro-Kosmos-EVAA-v2-8B/1762652580.231028", + "retrieved_timestamp": "1762652580.231029", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Auro-Kosmos-EVAA-v2-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Auro-Kosmos-EVAA-v2-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4778077722664752 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5447163557182707 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14123867069486404 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.425 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38580452127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.1-8B/4381d7ab-d19f-4fa0-a69a-978af28df8fa.json b/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.1-8B/4381d7ab-d19f-4fa0-a69a-978af28df8fa.json new file mode 100644 index 000000000..50437ac8d --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.1-8B/4381d7ab-d19f-4fa0-a69a-978af28df8fa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Auro-Kosmos-EVAA-v2.1-8B/1762652580.231263", + "retrieved_timestamp": "1762652580.231264", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Auro-Kosmos-EVAA-v2.1-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Auro-Kosmos-EVAA-v2.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4665919759571271 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5444200006474947 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14577039274924472 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4316979166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.382563164893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.2-8B/4e616fc6-8baa-4c9a-9098-b8d108911ad2.json b/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.2-8B/4e616fc6-8baa-4c9a-9098-b8d108911ad2.json new file mode 100644 index 000000000..b3536b2f4 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.2-8B/4e616fc6-8baa-4c9a-9098-b8d108911ad2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Auro-Kosmos-EVAA-v2.2-8B/1762652580.231466", + "retrieved_timestamp": "1762652580.231467", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Auro-Kosmos-EVAA-v2.2-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Auro-Kosmos-EVAA-v2.2-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4267997801389203 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5431077158331955 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14123867069486404 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42506249999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37982047872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.3-8B/9c7ee100-754e-4665-8527-452021a2243b.json b/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.3-8B/9c7ee100-754e-4665-8527-452021a2243b.json new file mode 100644 index 000000000..353deb671 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Auro-Kosmos-EVAA-v2.3-8B/9c7ee100-754e-4665-8527-452021a2243b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Auro-Kosmos-EVAA-v2.3-8B/1762652580.231667", + "retrieved_timestamp": "1762652580.231667", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Auro-Kosmos-EVAA-v2.3-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Auro-Kosmos-EVAA-v2.3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42712447417297217 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5440818233123913 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13444108761329304 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4277916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37840757978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-Aurora_faustus-8B/0563ee22-d981-45cb-83f8-7dbdb2734d10.json b/data/hfopenllm_v2/jaspionjader/Kosmos-Aurora_faustus-8B/0563ee22-d981-45cb-83f8-7dbdb2734d10.json new file mode 100644 index 000000000..e06375e9d --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-Aurora_faustus-8B/0563ee22-d981-45cb-83f8-7dbdb2734d10.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Aurora_faustus-8B/1762652580.231864", + "retrieved_timestamp": "1762652580.2318652", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-Aurora_faustus-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-Aurora_faustus-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.443236168920686 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5260325661068855 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11253776435045318 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4116979166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38131648936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-8B/746ffa2c-cc95-4d69-9e46-0e8f4febd440.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-8B/746ffa2c-cc95-4d69-9e46-0e8f4febd440.json new file mode 100644 index 000000000..451406eea --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-8B/746ffa2c-cc95-4d69-9e46-0e8f4febd440.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-8B/1762652580.232065", + "retrieved_timestamp": "1762652580.232065", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4404635256674513 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5311831227740652 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11782477341389729 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4236666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3818151595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B/f9e1901a-854d-4437-8d49-a6c47799f687.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B/f9e1901a-854d-4437-8d49-a6c47799f687.json new file mode 100644 index 000000000..edddc0881 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B/f9e1901a-854d-4437-8d49-a6c47799f687.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-Franken-Immersive-v39-8B/1762652580.232267", + "retrieved_timestamp": "1762652580.232268", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43779061778303796 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5189720817259138 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12915407854984895 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4236354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3900432180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-v38-8B/8919b3ad-529c-4391-bec3-65b81dad97c3.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-v38-8B/8919b3ad-529c-4391-bec3-65b81dad97c3.json new file mode 100644 index 000000000..6a72dd8cc --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Franken-v38-8B/8919b3ad-529c-4391-bec3-65b81dad97c3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-Franken-v38-8B/1762652580.2324722", + "retrieved_timestamp": "1762652580.2324731", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-Franken-v38-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-Franken-v38-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4355676272290855 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5229513322616746 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12915407854984895 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42115624999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3890458776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/3030519e-f137-4091-9394-26a0779f0ad9.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/3030519e-f137-4091-9394-26a0779f0ad9.json new file mode 100644 index 000000000..cc6f930e4 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/3030519e-f137-4091-9394-26a0779f0ad9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-Fusion-8B/1762652580.2328691", + "retrieved_timestamp": "1762652580.2328691", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-Fusion-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-Fusion-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43446832183052075 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5419028777027763 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12915407854984895 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42766666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38538896276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/ac41e588-0664-44f5-9fa9-eafd6508078b.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/ac41e588-0664-44f5-9fa9-eafd6508078b.json new file mode 100644 index 000000000..50b56b25b --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-Fusion-8B/ac41e588-0664-44f5-9fa9-eafd6508078b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-Fusion-8B/1762652580.23267", + "retrieved_timestamp": "1762652580.232671", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-Fusion-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-Fusion-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4417623018036587 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5405890148943007 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1351963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42766666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3859707446808511 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-8B/eb68e0e3-1e39-4779-bc99-4e1825d9c602.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-8B/eb68e0e3-1e39-4779-bc99-4e1825d9c602.json new file mode 100644 index 000000000..20fe2b34b --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-8B/eb68e0e3-1e39-4779-bc99-4e1825d9c602.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-8B/1762652580.233048", + "retrieved_timestamp": "1762652580.2330492", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-PRP-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-PRP-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34052092891306174 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5195634214282913 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08836858006042296 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4301145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3646941489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-light-8B/0d2e1c3f-8ee6-44b0-912a-452e2a5a6da7.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-light-8B/0d2e1c3f-8ee6-44b0-912a-452e2a5a6da7.json new file mode 100644 index 000000000..542c718bb --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-light-8B/0d2e1c3f-8ee6-44b0-912a-452e2a5a6da7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-light-8B/1762652580.233289", + "retrieved_timestamp": "1762652580.23329", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-PRP-light-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-PRP-light-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38238651223198894 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5271029575696119 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11027190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42490625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3781582446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v23-8B/5d5ae047-72d1-4083-8e28-dcce7337ed25.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v23-8B/5d5ae047-72d1-4083-8e28-dcce7337ed25.json new file mode 100644 index 000000000..0921bdf14 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v23-8B/5d5ae047-72d1-4083-8e28-dcce7337ed25.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v23-8B/1762652580.233495", + "retrieved_timestamp": "1762652580.233495", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-PRP-v23-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-PRP-v23-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4040933611705829 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5289840558524612 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11555891238670694 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43684375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37059507978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v24-8B/e6b62da0-ad6d-431c-8a0e-185c6eddf3da.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v24-8B/e6b62da0-ad6d-431c-8a0e-185c6eddf3da.json new file mode 100644 index 000000000..0ee959c8c --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v24-8B/e6b62da0-ad6d-431c-8a0e-185c6eddf3da.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v24-8B/1762652580.233697", + "retrieved_timestamp": "1762652580.2336981", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-PRP-v24-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-PRP-v24-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42587556572117535 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5276140433113651 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11027190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42903125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3779089095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v25-8B/81c8704c-7124-42d1-b320-77e31e35898b.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v25-8B/81c8704c-7124-42d1-b320-77e31e35898b.json new file mode 100644 index 000000000..7b383aefc --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v25-8B/81c8704c-7124-42d1-b320-77e31e35898b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v25-8B/1762652580.23391", + "retrieved_timestamp": "1762652580.23391", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-PRP-v25-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-PRP-v25-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4420869958377106 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5290702582598797 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11858006042296072 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4303333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37159242021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v26-8B/6705072a-5a46-49ae-925f-1cf7da1ea288.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v26-8B/6705072a-5a46-49ae-925f-1cf7da1ea288.json new file mode 100644 index 000000000..9302dea4d --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v26-8B/6705072a-5a46-49ae-925f-1cf7da1ea288.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v26-8B/1762652580.234126", + "retrieved_timestamp": "1762652580.234127", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-PRP-v26-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-PRP-v26-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4413877400851962 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5271171047819411 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11329305135951662 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4263645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3793218085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v27-8B/d3dcd3f0-2f43-4b82-ba29-77a69a9b3e8f.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v27-8B/d3dcd3f0-2f43-4b82-ba29-77a69a9b3e8f.json new file mode 100644 index 000000000..337938bd6 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v27-8B/d3dcd3f0-2f43-4b82-ba29-77a69a9b3e8f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v27-8B/1762652580.2343428", + "retrieved_timestamp": "1762652580.234344", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-PRP-v27-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-PRP-v27-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4378404854674486 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5290320010579407 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11933534743202417 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4343333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37549867021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v28-8B/e2aa230d-452e-42f0-a780-af255c62120e.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v28-8B/e2aa230d-452e-42f0-a780-af255c62120e.json new file mode 100644 index 000000000..3631eec91 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v28-8B/e2aa230d-452e-42f0-a780-af255c62120e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v28-8B/1762652580.234553", + "retrieved_timestamp": "1762652580.234553", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-PRP-v28-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-PRP-v28-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43659157701565177 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5294743678489208 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11706948640483383 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43296874999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v29-8B/86e94a19-e497-4539-802b-597ce0e0ced0.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v29-8B/86e94a19-e497-4539-802b-597ce0e0ced0.json new file mode 100644 index 000000000..df7c8e8b8 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v29-8B/86e94a19-e497-4539-802b-597ce0e0ced0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v29-8B/1762652580.234771", + "retrieved_timestamp": "1762652580.234771", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-PRP-v29-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-PRP-v29-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4487315877427448 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5275189525290296 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12009063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42366666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37649601063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v30-8B/320c581d-f667-4dab-a32c-bb9f2621e84d.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v30-8B/320c581d-f667-4dab-a32c-bb9f2621e84d.json new file mode 100644 index 000000000..6a5ad8efc --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v30-8B/320c581d-f667-4dab-a32c-bb9f2621e84d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v30-8B/1762652580.2349901", + "retrieved_timestamp": "1762652580.234991", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-PRP-v30-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-PRP-v30-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42947268802333366 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5327819889174134 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11782477341389729 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4263333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3937832446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v31-8B/0757cecd-bc5f-4095-90ee-25920ae6670c.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v31-8B/0757cecd-bc5f-4095-90ee-25920ae6670c.json new file mode 100644 index 000000000..51f26550d --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v31-8B/0757cecd-bc5f-4095-90ee-25920ae6670c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v31-8B/1762652580.235214", + "retrieved_timestamp": "1762652580.235214", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-PRP-v31-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-PRP-v31-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43986400528375824 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5315048053167004 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11329305135951662 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42506249999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39345079787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v32-8B/f58f0ecc-a059-448d-a2f9-e36b601e2154.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v32-8B/f58f0ecc-a059-448d-a2f9-e36b601e2154.json new file mode 100644 index 000000000..a49b52f28 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v32-8B/f58f0ecc-a059-448d-a2f9-e36b601e2154.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v32-8B/1762652580.235436", + "retrieved_timestamp": "1762652580.2354372", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-PRP-v32-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-PRP-v32-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4487315877427448 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5292530349260334 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1148036253776435 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42106249999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3776595744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v33-8B/2436838e-2b6a-4c1e-b8c2-ec505d9a4c34.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v33-8B/2436838e-2b6a-4c1e-b8c2-ec505d9a4c34.json new file mode 100644 index 000000000..cb329fb5c --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v33-8B/2436838e-2b6a-4c1e-b8c2-ec505d9a4c34.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v33-8B/1762652580.23565", + "retrieved_timestamp": "1762652580.235651", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-PRP-v33-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-PRP-v33-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4301719437758481 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5321153222507468 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11782477341389729 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41839583333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.390874335106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v34-8B/11486e0e-a9e3-43b0-b26e-299a86555d16.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v34-8B/11486e0e-a9e3-43b0-b26e-299a86555d16.json new file mode 100644 index 000000000..e40167abe --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-PRP-v34-8B/11486e0e-a9e3-43b0-b26e-299a86555d16.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v34-8B/1762652580.235871", + "retrieved_timestamp": "1762652580.235871", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-PRP-v34-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-PRP-v34-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45625052638111324 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.533301459442271 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11253776435045318 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42372916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3927027925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-8B/75037d12-da94-4c55-8de5-a7cef098d4b0.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-8B/75037d12-da94-4c55-8de5-a7cef098d4b0.json new file mode 100644 index 000000000..5ae868926 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-8B/75037d12-da94-4c55-8de5-a7cef098d4b0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-8B/1762652580.236081", + "retrieved_timestamp": "1762652580.2360818", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-TSN-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-TSN-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47213726246359655 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5176546480934434 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13444108761329304 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43290625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3816489361702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-light-8B/9f0aa20f-8687-4c21-b222-39a322f90842.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-light-8B/9f0aa20f-8687-4c21-b222-39a322f90842.json new file mode 100644 index 000000000..f0271dcdb --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-light-8B/9f0aa20f-8687-4c21-b222-39a322f90842.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-light-8B/1762652580.236298", + "retrieved_timestamp": "1762652580.236299", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-TSN-light-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-TSN-light-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46849027247702757 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5235021286391058 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1216012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42893749999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38056848404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v19-8B/91c2897a-3ae3-402b-aadf-26d0b8d746c5.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v19-8B/91c2897a-3ae3-402b-aadf-26d0b8d746c5.json new file mode 100644 index 000000000..aaf716631 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v19-8B/91c2897a-3ae3-402b-aadf-26d0b8d746c5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-v19-8B/1762652580.236516", + "retrieved_timestamp": "1762652580.2365172", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-TSN-v19-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-TSN-v19-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4563502617499346 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5316458785173577 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11555891238670694 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4276979166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37898936170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v20-8B/4a60fea6-e0e8-497e-9b29-439e7641e77b.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v20-8B/4a60fea6-e0e8-497e-9b29-439e7641e77b.json new file mode 100644 index 000000000..c2be55d37 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v20-8B/4a60fea6-e0e8-497e-9b29-439e7641e77b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-v20-8B/1762652580.236737", + "retrieved_timestamp": "1762652580.236737", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-TSN-v20-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-TSN-v20-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4423119545029411 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5250468078369915 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12462235649546828 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42103124999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39361702127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v21-8B/d9c819c2-a3f6-481e-bd71-47912aef9847.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v21-8B/d9c819c2-a3f6-481e-bd71-47912aef9847.json new file mode 100644 index 000000000..da78a06b0 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v21-8B/d9c819c2-a3f6-481e-bd71-47912aef9847.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-v21-8B/1762652580.2369542", + "retrieved_timestamp": "1762652580.236955", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-TSN-v21-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-TSN-v21-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46701640536000033 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.524796520922724 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11933534743202417 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43427083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3816489361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v22-8B/6e20f902-8752-466c-b8d4-34787fb90fce.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v22-8B/6e20f902-8752-466c-b8d4-34787fb90fce.json new file mode 100644 index 000000000..96e1a4ea2 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-TSN-v22-8B/6e20f902-8752-466c-b8d4-34787fb90fce.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-v22-8B/1762652580.2371762", + "retrieved_timestamp": "1762652580.2371771", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-TSN-v22-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-TSN-v22-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4673410993940522 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5245863682593667 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11329305135951662 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4303333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38115026595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-8B/d25510e4-6549-4f64-8ec4-37ac8671050c.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-8B/d25510e4-6549-4f64-8ec4-37ac8671050c.json new file mode 100644 index 000000000..b5033e3b0 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-8B/d25510e4-6549-4f64-8ec4-37ac8671050c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-8B/1762652580.237391", + "retrieved_timestamp": "1762652580.237392", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-gamma-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-gamma-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45722460848326885 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5321936191858193 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10498489425981873 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4305833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39012632978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-alt-8B/58e279d4-da0f-4e2c-a74d-c51caeaad884.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-alt-8B/58e279d4-da0f-4e2c-a74d-c51caeaad884.json new file mode 100644 index 000000000..a6c082b08 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-alt-8B/58e279d4-da0f-4e2c-a74d-c51caeaad884.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-alt-8B/1762652580.23761", + "retrieved_timestamp": "1762652580.23761", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-gamma-alt-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-gamma-alt-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4542270065648036 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5297928701221488 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1095166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42921875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3896276595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-8B/64c07a98-4f3f-49f7-99de-9963dcfedeba.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-8B/64c07a98-4f3f-49f7-99de-9963dcfedeba.json new file mode 100644 index 000000000..095e33504 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-8B/64c07a98-4f3f-49f7-99de-9963dcfedeba.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-light-8B/1762652580.237838", + "retrieved_timestamp": "1762652580.2378392", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-gamma-light-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-gamma-light-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45809895521660304 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5376138387743472 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11027190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42909375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.394281914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-alt-8B/abebffbf-48b5-4452-8c7a-bb1175a7e979.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-alt-8B/abebffbf-48b5-4452-8c7a-bb1175a7e979.json new file mode 100644 index 000000000..14284eed2 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-light-alt-8B/abebffbf-48b5-4452-8c7a-bb1175a7e979.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-light-alt-8B/1762652580.238084", + "retrieved_timestamp": "1762652580.238085", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-gamma-light-alt-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-gamma-light-alt-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44535942410581697 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5327145731870764 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11329305135951662 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43045833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39228723404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B/1810feae-7a27-4c17-8174-3cd8a143b21f.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B/1810feae-7a27-4c17-8174-3cd8a143b21f.json new file mode 100644 index 000000000..fc4963cae --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B/1810feae-7a27-4c17-8174-3cd8a143b21f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-ultra-light-8B/1762652580.238316", + "retrieved_timestamp": "1762652580.238317", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4563003940655239 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5316344937208096 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11782477341389729 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4196979166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3914561170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v13-8B/1fc6ca13-157c-4502-8724-be153afb6347.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v13-8B/1fc6ca13-157c-4502-8724-be153afb6347.json new file mode 100644 index 000000000..e6ad25d6f --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v13-8B/1fc6ca13-157c-4502-8724-be153afb6347.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v13-8B/1762652580.238605", + "retrieved_timestamp": "1762652580.238605", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-gamma-v13-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-gamma-v13-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44286160720222345 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5359422335881335 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11178247734138973 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42776041666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3929521276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v14-8B/c20f5702-24fc-443a-875e-495401776eeb.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v14-8B/c20f5702-24fc-443a-875e-495401776eeb.json new file mode 100644 index 000000000..938a402fd --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v14-8B/c20f5702-24fc-443a-875e-495401776eeb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v14-8B/1762652580.23884", + "retrieved_timestamp": "1762652580.23884", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-gamma-v14-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-gamma-v14-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4380155764482684 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5363063034440413 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11027190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42772916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3931183510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v15-8B/24e11e0c-fb61-46c1-a05e-c533eb392195.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v15-8B/24e11e0c-fb61-46c1-a05e-c533eb392195.json new file mode 100644 index 000000000..1578c90b0 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v15-8B/24e11e0c-fb61-46c1-a05e-c533eb392195.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v15-8B/1762652580.239064", + "retrieved_timestamp": "1762652580.2390652", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-gamma-v15-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-gamma-v15-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4654428028741517 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.534326872652317 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11102719033232629 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42772916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3941156914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v16-8B/15deaa33-87a2-442e-9618-13f5ab6c299e.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v16-8B/15deaa33-87a2-442e-9618-13f5ab6c299e.json new file mode 100644 index 000000000..f7e8b8180 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v16-8B/15deaa33-87a2-442e-9618-13f5ab6c299e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v16-8B/1762652580.2392871", + "retrieved_timestamp": "1762652580.239288", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-gamma-v16-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-gamma-v16-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4556510059974202 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5343925058514598 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11706948640483383 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4264270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39170545212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v17-8B/bd4cc259-d535-437a-afc5-d74a60154b07.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v17-8B/bd4cc259-d535-437a-afc5-d74a60154b07.json new file mode 100644 index 000000000..d7edd8bbb --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v17-8B/bd4cc259-d535-437a-afc5-d74a60154b07.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v17-8B/1762652580.239734", + "retrieved_timestamp": "1762652580.239739", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-gamma-v17-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-gamma-v17-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4462337708391512 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5346666279815969 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11102719033232629 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42906249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39228723404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v18-8B/aadb6262-4f31-4681-983c-0d19e8bbc5cd.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v18-8B/aadb6262-4f31-4681-983c-0d19e8bbc5cd.json new file mode 100644 index 000000000..0455e03c5 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-gamma-v18-8B/aadb6262-4f31-4681-983c-0d19e8bbc5cd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v18-8B/1762652580.240138", + "retrieved_timestamp": "1762652580.240139", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-gamma-v18-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-gamma-v18-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43409376011205825 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5339179190615058 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11102719033232629 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4316979166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3904587765957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B/41e3ecda-8988-456c-b413-19770e2f05c7.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B/41e3ecda-8988-456c-b413-19770e2f05c7.json new file mode 100644 index 000000000..d25d470f1 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B/41e3ecda-8988-456c-b413-19770e2f05c7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-immersive-sof-v44-8B/1762652580.2404292", + "retrieved_timestamp": "1762652580.24043", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44078821970150317 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5214884907801955 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11858006042296072 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4143958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3887965425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v10-8B/c57d95da-1b6f-4ce7-8c42-f1129fc1e55e.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v10-8B/c57d95da-1b6f-4ce7-8c42-f1129fc1e55e.json new file mode 100644 index 000000000..f9fd41248 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v10-8B/c57d95da-1b6f-4ce7-8c42-f1129fc1e55e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v10-8B/1762652580.2406652", + "retrieved_timestamp": "1762652580.2406662", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-v10-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-v10-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4261503920708165 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5375875314179012 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12462235649546828 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4223645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38314494680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v11-8B/9a6b85d5-bb26-4832-915e-8b1ac90b0793.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v11-8B/9a6b85d5-bb26-4832-915e-8b1ac90b0793.json new file mode 100644 index 000000000..af34a68ea --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v11-8B/9a6b85d5-bb26-4832-915e-8b1ac90b0793.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v11-8B/1762652580.240909", + "retrieved_timestamp": "1762652580.24091", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-v11-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-v11-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44263664853699297 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5359208647512345 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13217522658610273 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4184270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3835605053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v12-8B/4bcdbab0-7220-40bb-832f-01003f59da0f.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v12-8B/4bcdbab0-7220-40bb-832f-01003f59da0f.json new file mode 100644 index 000000000..53dba0181 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v12-8B/4bcdbab0-7220-40bb-832f-01003f59da0f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v12-8B/1762652580.2411451", + "retrieved_timestamp": "1762652580.241146", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-v12-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-v12-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43779061778303796 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5348808250181011 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13670694864048338 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42106249999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3835605053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v2-8B/8f16aed2-8b31-48cc-b874-8d437f26f3db.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v2-8B/8f16aed2-8b31-48cc-b874-8d437f26f3db.json new file mode 100644 index 000000000..c024ad49a --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v2-8B/8f16aed2-8b31-48cc-b874-8d437f26f3db.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v2-8B/1762652580.241379", + "retrieved_timestamp": "1762652580.2413802", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-v2-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-v2-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4395891789341171 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5341160060985229 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13217522658610273 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42106249999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3826462765957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v3-8B/262a66ee-04e4-49d5-8bb2-efe0a93801ad.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v3-8B/262a66ee-04e4-49d5-8bb2-efe0a93801ad.json new file mode 100644 index 000000000..f726072b8 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v3-8B/262a66ee-04e4-49d5-8bb2-efe0a93801ad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v3-8B/1762652580.241601", + "retrieved_timestamp": "1762652580.241602", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-v3-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-v3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4410630460511443 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5330987974156178 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13293051359516617 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4223958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38214760638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v4-8B/fd2a2a9c-639f-4348-9861-00271ed070b2.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v4-8B/fd2a2a9c-639f-4348-9861-00271ed070b2.json new file mode 100644 index 000000000..d52230f9c --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v4-8B/fd2a2a9c-639f-4348-9861-00271ed070b2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v4-8B/1762652580.241815", + "retrieved_timestamp": "1762652580.241816", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-v4-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-v4-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4289230353240513 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5336560458316563 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12537764350453173 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41972916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38173204787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v5-8B/53c89eb1-49ab-4e5f-b1ad-d8e80045a292.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v5-8B/53c89eb1-49ab-4e5f-b1ad-d8e80045a292.json new file mode 100644 index 000000000..70cffc619 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v5-8B/53c89eb1-49ab-4e5f-b1ad-d8e80045a292.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v5-8B/1762652580.2420359", + "retrieved_timestamp": "1762652580.2420359", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-v5-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-v5-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44595894448951 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5344958011609363 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12613293051359517 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4223958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3820644946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v6-8B/c0cc1ad5-9e53-45ac-becb-f8ce3e5ac631.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v6-8B/c0cc1ad5-9e53-45ac-becb-f8ce3e5ac631.json new file mode 100644 index 000000000..f21fb54a4 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v6-8B/c0cc1ad5-9e53-45ac-becb-f8ce3e5ac631.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v6-8B/1762652580.242274", + "retrieved_timestamp": "1762652580.242275", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-v6-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-v6-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4395891789341171 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5379609044843678 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12915407854984895 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4184270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3820644946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v7-8B/798c2f08-e10b-4115-bdd5-0d6053d03b60.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v7-8B/798c2f08-e10b-4115-bdd5-0d6053d03b60.json new file mode 100644 index 000000000..57bee684b --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v7-8B/798c2f08-e10b-4115-bdd5-0d6053d03b60.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v7-8B/1762652580.242492", + "retrieved_timestamp": "1762652580.242493", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-v7-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-v7-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4276741268722545 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5334882804815716 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1336858006042296 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41709375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3835605053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v8-8B/388ef85a-db27-4851-9e6e-2002a75bc6c7.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v8-8B/388ef85a-db27-4851-9e6e-2002a75bc6c7.json new file mode 100644 index 000000000..3757ac2f7 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v8-8B/388ef85a-db27-4851-9e6e-2002a75bc6c7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v8-8B/1762652580.242712", + "retrieved_timestamp": "1762652580.242713", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-v8-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-v8-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43834027048232027 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5359208647512345 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13066465256797583 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42103124999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38272938829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-8B/cd0c4096-93ee-4a04-83b0-44063770e81b.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-8B/cd0c4096-93ee-4a04-83b0-44063770e81b.json new file mode 100644 index 000000000..9890fd64a --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-8B/cd0c4096-93ee-4a04-83b0-44063770e81b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v9-8B/1762652580.242934", + "retrieved_timestamp": "1762652580.242935", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-v9-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-v9-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43686640336529303 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5360680608930435 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12764350453172205 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4183958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3819813829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B/69f3e2b2-8918-41a8-abc6-c84c3d674f94.json b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B/69f3e2b2-8918-41a8-abc6-c84c3d674f94.json new file mode 100644 index 000000000..34b1d850b --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B/69f3e2b2-8918-41a8-abc6-c84c3d674f94.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v9-TitanFusion-Mix-8B/1762652580.243146", + "retrieved_timestamp": "1762652580.243147", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.428373382624769 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5539931244833417 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1148036253776435 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43544791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3836436170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-8b/60d775f1-47a9-45ae-9b2f-75b95c9d96cd.json b/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-8b/60d775f1-47a9-45ae-9b2f-75b95c9d96cd.json new file mode 100644 index 000000000..e45486c44 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-8b/60d775f1-47a9-45ae-9b2f-75b95c9d96cd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Elusive-8b/1762652580.243371", + "retrieved_timestamp": "1762652580.243371", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-Elusive-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-Elusive-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41688275996577967 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5338593917060857 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12613293051359517 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4077916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3759973404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-8B/d3af54be-9d9a-4a4a-b03e-3468a801795e.json b/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-8B/d3af54be-9d9a-4a4a-b03e-3468a801795e.json new file mode 100644 index 000000000..c9dad4af5 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-8B/d3af54be-9d9a-4a4a-b03e-3468a801795e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Elusive-VENN-8B/1762652580.243592", + "retrieved_timestamp": "1762652580.243593", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-Elusive-VENN-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-Elusive-VENN-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4232525255211727 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5355598563659026 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12462235649546828 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4156979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3797373670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B/e7cf15b2-0347-48a8-bf84-08e27b3688fd.json b/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B/e7cf15b2-0347-48a8-bf84-08e27b3688fd.json new file mode 100644 index 000000000..3e6d0d1e2 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B/e7cf15b2-0347-48a8-bf84-08e27b3688fd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Elusive-VENN-Asymmetric-8B/1762652580.243807", + "retrieved_timestamp": "1762652580.243807", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4541771388803929 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5312976840812583 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13444108761329304 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42506249999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3842253989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B/8befbe9f-3ab2-4bc8-bd16-5badd2291d5d.json b/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B/8befbe9f-3ab2-4bc8-bd16-5badd2291d5d.json new file mode 100644 index 000000000..3fef12559 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B/8befbe9f-3ab2-4bc8-bd16-5badd2291d5d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Elusive-VENN-Aurora_faustus-8B/1762652580.244045", + "retrieved_timestamp": "1762652580.244046", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4335441074127758 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5303980337010061 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11253776435045318 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.417 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3794880319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/Kosmos-VENN-8B/e14cedfb-79a9-446a-ba16-64f378a47b4a.json b/data/hfopenllm_v2/jaspionjader/Kosmos-VENN-8B/e14cedfb-79a9-446a-ba16-64f378a47b4a.json new file mode 100644 index 000000000..b3b63f9ae --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/Kosmos-VENN-8B/e14cedfb-79a9-446a-ba16-64f378a47b4a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-VENN-8B/1762652580.24428", + "retrieved_timestamp": "1762652580.244281", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/Kosmos-VENN-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/Kosmos-VENN-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.433219413378724 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5317923607687299 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14123867069486404 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42109375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3800698138297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-8B/84a37d06-2668-4143-8e2f-5a08651f2dfb.json b/data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-8B/84a37d06-2668-4143-8e2f-5a08651f2dfb.json new file mode 100644 index 000000000..3b5059a71 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-8B/84a37d06-2668-4143-8e2f-5a08651f2dfb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_PRP-Kosmos-EVAA-8B/1762652580.244709", + "retrieved_timestamp": "1762652580.24471", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/PRP-Kosmos-EVAA-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/PRP-Kosmos-EVAA-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36327721556580983 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5237421324582278 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09592145015105741 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.425 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3765791223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-light-8B/72c9dcd4-ab00-4f36-a1e6-43e241c8b967.json b/data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-light-8B/72c9dcd4-ab00-4f36-a1e6-43e241c8b967.json new file mode 100644 index 000000000..f8c0c2bff --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/PRP-Kosmos-EVAA-light-8B/72c9dcd4-ab00-4f36-a1e6-43e241c8b967.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_PRP-Kosmos-EVAA-light-8B/1762652580.2449658", + "retrieved_timestamp": "1762652580.244967", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/PRP-Kosmos-EVAA-light-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/PRP-Kosmos-EVAA-light-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4321201079801593 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5274582578494339 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11027190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221476510067114 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4235416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3631150265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-8B/9819f2bd-8108-4fc5-9208-ce295d860435.json b/data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-8B/9819f2bd-8108-4fc5-9208-ce295d860435.json new file mode 100644 index 000000000..757adba05 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-8B/9819f2bd-8108-4fc5-9208-ce295d860435.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_TSN-Kosmos-EVAA-8B/1762652580.2451851", + "retrieved_timestamp": "1762652580.245186", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/TSN-Kosmos-EVAA-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/TSN-Kosmos-EVAA-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49032234471203073 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5347376087743225 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14501510574018128 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4173125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.383061835106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-v2-8B/2ce2b8e4-0cd4-4001-8790-ad5e26e3e45c.json b/data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-v2-8B/2ce2b8e4-0cd4-4001-8790-ad5e26e3e45c.json new file mode 100644 index 000000000..93618df89 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/TSN-Kosmos-EVAA-v2-8B/2ce2b8e4-0cd4-4001-8790-ad5e26e3e45c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_TSN-Kosmos-EVAA-v2-8B/1762652580.2454138", + "retrieved_timestamp": "1762652580.245415", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/TSN-Kosmos-EVAA-v2-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/TSN-Kosmos-EVAA-v2-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46669171132594844 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.534342097284994 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10800604229607251 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221476510067114 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41864583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3762466755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bbb-1/b6ca35e1-8680-49e8-a6dd-963214be7411.json b/data/hfopenllm_v2/jaspionjader/bbb-1/b6ca35e1-8680-49e8-a6dd-963214be7411.json new file mode 100644 index 000000000..dd316fe9d --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bbb-1/b6ca35e1-8680-49e8-a6dd-963214be7411.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-1/1762652580.2456498", + "retrieved_timestamp": "1762652580.245653", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bbb-1", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bbb-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4864005283758206 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5375556962119087 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13670694864048338 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41706250000000006 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38971077127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bbb-2/155b7412-cc16-45c3-9261-acc9322a0dcc.json b/data/hfopenllm_v2/jaspionjader/bbb-2/155b7412-cc16-45c3-9261-acc9322a0dcc.json new file mode 100644 index 000000000..786bf64e9 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bbb-2/155b7412-cc16-45c3-9261-acc9322a0dcc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-2/1762652580.2460952", + "retrieved_timestamp": "1762652580.2460968", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bbb-2", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bbb-2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4077403511571519 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5066789926627318 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11253776435045318 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4144583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.363530585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bbb-3/94668ddb-d2fb-44e2-8ed7-10179d145366.json b/data/hfopenllm_v2/jaspionjader/bbb-3/94668ddb-d2fb-44e2-8ed7-10179d145366.json new file mode 100644 index 000000000..d2f776699 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bbb-3/94668ddb-d2fb-44e2-8ed7-10179d145366.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-3/1762652580.24635", + "retrieved_timestamp": "1762652580.246351", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bbb-3", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bbb-3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.416832892281369 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5157831821186084 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1404833836858006 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4264895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38563829787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bbb-4/828a6bd0-a205-4327-bc77-2e8a84c0b69e.json b/data/hfopenllm_v2/jaspionjader/bbb-4/828a6bd0-a205-4327-bc77-2e8a84c0b69e.json new file mode 100644 index 000000000..807a6673f --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bbb-4/828a6bd0-a205-4327-bc77-2e8a84c0b69e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-4/1762652580.2465842", + "retrieved_timestamp": "1762652580.2465851", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bbb-4", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bbb-4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47675833455232114 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.52115051798211 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12764350453172205 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40924999999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3773271276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bbb-5/8c0a66fb-c87d-489d-b071-b4a599562ead.json b/data/hfopenllm_v2/jaspionjader/bbb-5/8c0a66fb-c87d-489d-b071-b4a599562ead.json new file mode 100644 index 000000000..7245abd9d --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bbb-5/8c0a66fb-c87d-489d-b071-b4a599562ead.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-5/1762652580.2468202", + "retrieved_timestamp": "1762652580.2468212", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bbb-5", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bbb-5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4702888336281067 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5206902586604485 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13972809667673716 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3998229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3833942819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bbb-6/ef8025de-fe9f-4a79-97f6-c26c18ab049a.json b/data/hfopenllm_v2/jaspionjader/bbb-6/ef8025de-fe9f-4a79-97f6-c26c18ab049a.json new file mode 100644 index 000000000..aa0870301 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bbb-6/ef8025de-fe9f-4a79-97f6-c26c18ab049a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-6/1762652580.247051", + "retrieved_timestamp": "1762652580.247051", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bbb-6", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bbb-6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48797413086166924 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5211453749255449 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13897280966767372 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40515625000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3871343085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bbb-7/a31fbd82-2e21-40e7-a73a-c6351c80bae7.json b/data/hfopenllm_v2/jaspionjader/bbb-7/a31fbd82-2e21-40e7-a73a-c6351c80bae7.json new file mode 100644 index 000000000..1c2bcd402 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bbb-7/a31fbd82-2e21-40e7-a73a-c6351c80bae7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-7/1762652580.2473001", + "retrieved_timestamp": "1762652580.247304", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bbb-7", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bbb-7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48280340607366234 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5211089947725771 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13670694864048338 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4038229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3859707446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-1/15ec7997-1333-43c6-869a-ce4589af56d1.json b/data/hfopenllm_v2/jaspionjader/bh-1/15ec7997-1333-43c6-869a-ce4589af56d1.json new file mode 100644 index 000000000..f27ac4808 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-1/15ec7997-1333-43c6-869a-ce4589af56d1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-1/1762652580.2475939", + "retrieved_timestamp": "1762652580.247595", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-1", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42842325030917966 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5890155164168736 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4441041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3449135638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-10/86411dbb-e28b-4e9d-856e-fcc001252fbe.json b/data/hfopenllm_v2/jaspionjader/bh-10/86411dbb-e28b-4e9d-856e-fcc001252fbe.json new file mode 100644 index 000000000..8867badf1 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-10/86411dbb-e28b-4e9d-856e-fcc001252fbe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-10/1762652580.247846", + "retrieved_timestamp": "1762652580.2478468", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-10", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-10" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46184568057199343 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5856025427339699 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11027190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41985416666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37076130319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-11/804f4be8-a8a9-473f-a898-d71b742a62eb.json b/data/hfopenllm_v2/jaspionjader/bh-11/804f4be8-a8a9-473f-a898-d71b742a62eb.json new file mode 100644 index 000000000..aea58f212 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-11/804f4be8-a8a9-473f-a898-d71b742a62eb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-11/1762652580.2481", + "retrieved_timestamp": "1762652580.2481012", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-11", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-11" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45754930251732073 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5851155912628809 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11782477341389729 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4145520833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3738364361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-12/736ee66e-bd19-4275-afaf-73c2112c2fbd.json b/data/hfopenllm_v2/jaspionjader/bh-12/736ee66e-bd19-4275-afaf-73c2112c2fbd.json new file mode 100644 index 000000000..165d0a1a8 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-12/736ee66e-bd19-4275-afaf-73c2112c2fbd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-12/1762652580.248367", + "retrieved_timestamp": "1762652580.248368", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-12", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-12" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47338617091539337 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5802489392471556 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11858006042296072 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4144895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37367021276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-13/da5a3c32-371f-44e5-89a7-c9ba6e98664e.json b/data/hfopenllm_v2/jaspionjader/bh-13/da5a3c32-371f-44e5-89a7-c9ba6e98664e.json new file mode 100644 index 000000000..2b65c2a3b --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-13/da5a3c32-371f-44e5-89a7-c9ba6e98664e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-13/1762652580.248588", + "retrieved_timestamp": "1762652580.248588", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-13", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-13" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4697890486132351 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5777886799254942 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11253776435045318 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41585416666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37300531914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-15/af3bd92d-45f5-4a48-89aa-b8c956209d5a.json b/data/hfopenllm_v2/jaspionjader/bh-15/af3bd92d-45f5-4a48-89aa-b8c956209d5a.json new file mode 100644 index 000000000..05607539a --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-15/af3bd92d-45f5-4a48-89aa-b8c956209d5a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-15/1762652580.248791", + "retrieved_timestamp": "1762652580.2487922", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-15", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-15" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47453534399836883 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5818643001829722 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12462235649546828 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4105208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37666223404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-16/c98928d3-0d7f-429c-927c-bf8fa432101a.json b/data/hfopenllm_v2/jaspionjader/bh-16/c98928d3-0d7f-429c-927c-bf8fa432101a.json new file mode 100644 index 000000000..be97e7cb5 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-16/c98928d3-0d7f-429c-927c-bf8fa432101a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-16/1762652580.2489972", + "retrieved_timestamp": "1762652580.248998", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-16", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-16" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4730614768813415 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5783335636603978 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11933534743202417 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4158541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37757646276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-17/787d8040-25c8-4893-b140-cf041260d767.json b/data/hfopenllm_v2/jaspionjader/bh-17/787d8040-25c8-4893-b140-cf041260d767.json new file mode 100644 index 000000000..1a41644f3 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-17/787d8040-25c8-4893-b140-cf041260d767.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-17/1762652580.249204", + "retrieved_timestamp": "1762652580.2492049", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-17", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-17" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4721871301480073 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5776302177859685 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11329305135951662 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41582291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37566489361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-18/6aad7ade-7bd0-4515-b4ac-2299c58da098.json b/data/hfopenllm_v2/jaspionjader/bh-18/6aad7ade-7bd0-4515-b4ac-2299c58da098.json new file mode 100644 index 000000000..0ccf4a784 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-18/6aad7ade-7bd0-4515-b4ac-2299c58da098.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-18/1762652580.249514", + "retrieved_timestamp": "1762652580.249515", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-18", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-18" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47246195649764844 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5823837707078298 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11858006042296072 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4184895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37566489361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-19/81914fd7-1410-4b80-9be9-6ebfbb664613.json b/data/hfopenllm_v2/jaspionjader/bh-19/81914fd7-1410-4b80-9be9-6ebfbb664613.json new file mode 100644 index 000000000..b85692f95 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-19/81914fd7-1410-4b80-9be9-6ebfbb664613.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-19/1762652580.249828", + "retrieved_timestamp": "1762652580.249829", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-19", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-19" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45842364925065493 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5765774285787187 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11933534743202417 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.417125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3774933510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-2/3e4b8dcc-9270-4b14-952f-c6b96ee8ce57.json b/data/hfopenllm_v2/jaspionjader/bh-2/3e4b8dcc-9270-4b14-952f-c6b96ee8ce57.json new file mode 100644 index 000000000..e2e90f5bd --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-2/3e4b8dcc-9270-4b14-952f-c6b96ee8ce57.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-2/1762652580.250077", + "retrieved_timestamp": "1762652580.250078", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-2", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45792386423578324 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5937358907182445 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1027190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41864583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3695146276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-20/cfe4ab09-c772-4617-88b6-77e49553605b.json b/data/hfopenllm_v2/jaspionjader/bh-20/cfe4ab09-c772-4617-88b6-77e49553605b.json new file mode 100644 index 000000000..8d19b38f3 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-20/cfe4ab09-c772-4617-88b6-77e49553605b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-20/1762652580.2503", + "retrieved_timestamp": "1762652580.2503", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-20", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-20" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4727367828472896 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.574973333640619 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12009063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4105208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3768284574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-21/a369ff4f-7fe9-4764-be74-83563dbaf635.json b/data/hfopenllm_v2/jaspionjader/bh-21/a369ff4f-7fe9-4764-be74-83563dbaf635.json new file mode 100644 index 000000000..b5e7c12b5 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-21/a369ff4f-7fe9-4764-be74-83563dbaf635.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-21/1762652580.25052", + "retrieved_timestamp": "1762652580.2505212", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-21", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-21" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47001400727846554 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5738369241857685 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1216012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4157916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37757646276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-22/f3815ff9-c1bd-4706-a770-4c0b0e8c5d13.json b/data/hfopenllm_v2/jaspionjader/bh-22/f3815ff9-c1bd-4706-a770-4c0b0e8c5d13.json new file mode 100644 index 000000000..46623a261 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-22/f3815ff9-c1bd-4706-a770-4c0b0e8c5d13.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-22/1762652580.250869", + "retrieved_timestamp": "1762652580.25087", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-22", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-22" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45999725173650363 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.579296884452635 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11858006042296072 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41715625000000006 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3764128989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-23/f4db95ae-8e3d-45ed-9c53-3b30fde0cb3e.json b/data/hfopenllm_v2/jaspionjader/bh-23/f4db95ae-8e3d-45ed-9c53-3b30fde0cb3e.json new file mode 100644 index 000000000..080421792 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-23/f4db95ae-8e3d-45ed-9c53-3b30fde0cb3e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-23/1762652580.2511601", + "retrieved_timestamp": "1762652580.251161", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-23", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-23" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46576749690820357 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.570027700842045 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12009063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4197291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37957114361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-24/0b27b829-6588-4f7b-80fe-6e6767287a38.json b/data/hfopenllm_v2/jaspionjader/bh-24/0b27b829-6588-4f7b-80fe-6e6767287a38.json new file mode 100644 index 000000000..cf5fba37d --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-24/0b27b829-6588-4f7b-80fe-6e6767287a38.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-24/1762652580.251392", + "retrieved_timestamp": "1762652580.251392", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-24", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-24" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4715377420799035 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5716684749879075 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1268882175226586 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4157604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38090093085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-25/a0c16d3d-e3f2-4c50-975a-70b69824b3d5.json b/data/hfopenllm_v2/jaspionjader/bh-25/a0c16d3d-e3f2-4c50-975a-70b69824b3d5.json new file mode 100644 index 000000000..355db32fe --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-25/a0c16d3d-e3f2-4c50-975a-70b69824b3d5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-25/1762652580.251633", + "retrieved_timestamp": "1762652580.251633", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-25", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-25" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47518473206647255 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5705628020556314 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11329305135951662 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4117916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37824135638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-26/0218b7de-bbd7-4196-8fec-3f6fb790a3c1.json b/data/hfopenllm_v2/jaspionjader/bh-26/0218b7de-bbd7-4196-8fec-3f6fb790a3c1.json new file mode 100644 index 000000000..9ab8a4a07 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-26/0218b7de-bbd7-4196-8fec-3f6fb790a3c1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-26/1762652580.251851", + "retrieved_timestamp": "1762652580.251852", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-26", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-26" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4690897928607206 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5734958656360526 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1163141993957704 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4276979166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3771609042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-27/68435a43-944b-4c66-979b-eb48f7a8e77a.json b/data/hfopenllm_v2/jaspionjader/bh-27/68435a43-944b-4c66-979b-eb48f7a8e77a.json new file mode 100644 index 000000000..5ef6ecb91 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-27/68435a43-944b-4c66-979b-eb48f7a8e77a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-27/1762652580.2520802", + "retrieved_timestamp": "1762652580.252081", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-27", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-27" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4818791916559174 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.571405917910282 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12764350453172205 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.409125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3799035904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-28/0dc95982-e5b0-4011-9e5b-48af7e3048f0.json b/data/hfopenllm_v2/jaspionjader/bh-28/0dc95982-e5b0-4011-9e5b-48af7e3048f0.json new file mode 100644 index 000000000..3050d6e3c --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-28/0dc95982-e5b0-4011-9e5b-48af7e3048f0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-28/1762652580.252297", + "retrieved_timestamp": "1762652580.2522979", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-28", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-28" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4785070280189896 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5702617832390487 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12311178247734139 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.413125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3812333776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-29/012eeeed-c556-460d-82f6-34bdc31da5cf.json b/data/hfopenllm_v2/jaspionjader/bh-29/012eeeed-c556-460d-82f6-34bdc31da5cf.json new file mode 100644 index 000000000..2b6817e50 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-29/012eeeed-c556-460d-82f6-34bdc31da5cf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-29/1762652580.252519", + "retrieved_timestamp": "1762652580.2525198", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-29", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-29" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46881496651107946 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5670161357895335 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4236979166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38189827127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-3/37e59290-b4ea-4a44-bfb0-cdbe781c4d7f.json b/data/hfopenllm_v2/jaspionjader/bh-3/37e59290-b4ea-4a44-bfb0-cdbe781c4d7f.json new file mode 100644 index 000000000..94d8d7c62 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-3/37e59290-b4ea-4a44-bfb0-cdbe781c4d7f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-3/1762652580.2527301", + "retrieved_timestamp": "1762652580.2527308", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-3", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4663670172918966 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5890722855221537 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1148036253776435 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41728125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37017952127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-30/6d3a64df-5ebb-4cd8-bd6c-de799d185fe1.json b/data/hfopenllm_v2/jaspionjader/bh-30/6d3a64df-5ebb-4cd8-bd6c-de799d185fe1.json new file mode 100644 index 000000000..3525e43b4 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-30/6d3a64df-5ebb-4cd8-bd6c-de799d185fe1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-30/1762652580.252943", + "retrieved_timestamp": "1762652580.2529438", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-30", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-30" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46664184364153777 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5705838505746653 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12311178247734139 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4144270833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3781582446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-31/a637936e-646b-4c21-964a-61e253fd3705.json b/data/hfopenllm_v2/jaspionjader/bh-31/a637936e-646b-4c21-964a-61e253fd3705.json new file mode 100644 index 000000000..b4745b290 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-31/a637936e-646b-4c21-964a-61e253fd3705.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-31/1762652580.253162", + "retrieved_timestamp": "1762652580.253163", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-31", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-31" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4727367828472896 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5665082303171874 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4104270833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3819813829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-32/a56c62cc-c318-4de4-b6c7-0fa10229a127.json b/data/hfopenllm_v2/jaspionjader/bh-32/a56c62cc-c318-4de4-b6c7-0fa10229a127.json new file mode 100644 index 000000000..5bb64bb99 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-32/a56c62cc-c318-4de4-b6c7-0fa10229a127.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-32/1762652580.253373", + "retrieved_timestamp": "1762652580.2533739", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-32", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-32" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4635943740386619 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5662056335064284 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12462235649546828 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4157291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3812333776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-33/bcab8546-ea69-4207-b69b-ab982b603e55.json b/data/hfopenllm_v2/jaspionjader/bh-33/bcab8546-ea69-4207-b69b-ab982b603e55.json new file mode 100644 index 000000000..7dcf2f445 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-33/bcab8546-ea69-4207-b69b-ab982b603e55.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-33/1762652580.25359", + "retrieved_timestamp": "1762652580.253591", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-33", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-33" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4685401401614383 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5652966799156172 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11782477341389729 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4156979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38081781914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-34/6097086b-8c8b-493e-af1a-71146a2ed566.json b/data/hfopenllm_v2/jaspionjader/bh-34/6097086b-8c8b-493e-af1a-71146a2ed566.json new file mode 100644 index 000000000..365e7bdc6 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-34/6097086b-8c8b-493e-af1a-71146a2ed566.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-34/1762652580.253809", + "retrieved_timestamp": "1762652580.25381", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-34", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-34" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4623953332712758 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5681235912530039 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4184583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38040226063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-35/7166192e-42b0-4990-8218-88bb38fd1bdb.json b/data/hfopenllm_v2/jaspionjader/bh-35/7166192e-42b0-4990-8218-88bb38fd1bdb.json new file mode 100644 index 000000000..f54e357ef --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-35/7166192e-42b0-4990-8218-88bb38fd1bdb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-35/1762652580.2540212", + "retrieved_timestamp": "1762652580.254022", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-35", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-35" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47213726246359655 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5639648300586834 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12462235649546828 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41830208333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3829787234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-36/3a4f8c97-9f30-44b8-8f79-7f19f90a08d1.json b/data/hfopenllm_v2/jaspionjader/bh-36/3a4f8c97-9f30-44b8-8f79-7f19f90a08d1.json new file mode 100644 index 000000000..556b75506 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-36/3a4f8c97-9f30-44b8-8f79-7f19f90a08d1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-36/1762652580.2542279", + "retrieved_timestamp": "1762652580.254229", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-36", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-36" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4665919759571271 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5664445599052024 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12386706948640483 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4196354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.383061835106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-37/19490f78-486d-4325-b31e-af8555c32ea9.json b/data/hfopenllm_v2/jaspionjader/bh-37/19490f78-486d-4325-b31e-af8555c32ea9.json new file mode 100644 index 000000000..283c19ddd --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-37/19490f78-486d-4325-b31e-af8555c32ea9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-37/1762652580.2544441", + "retrieved_timestamp": "1762652580.254445", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-37", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-37" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48797413086166924 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.562488460737535 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1216012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4156354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3828125 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-38/61e7c49e-abb9-4e38-ba3f-1018db104d83.json b/data/hfopenllm_v2/jaspionjader/bh-38/61e7c49e-abb9-4e38-ba3f-1018db104d83.json new file mode 100644 index 000000000..46a5c321b --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-38/61e7c49e-abb9-4e38-ba3f-1018db104d83.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-38/1762652580.2548852", + "retrieved_timestamp": "1762652580.2548869", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-38", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-38" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46179581288758276 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5658176339168742 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12386706948640483 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4117291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3810671542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-39/243e6b7b-a34f-44cd-b027-176f877ff8e7.json b/data/hfopenllm_v2/jaspionjader/bh-39/243e6b7b-a34f-44cd-b027-176f877ff8e7.json new file mode 100644 index 000000000..64945462b --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-39/243e6b7b-a34f-44cd-b027-176f877ff8e7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-39/1762652580.2552152", + "retrieved_timestamp": "1762652580.2552161", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-39", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-39" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45759917020173135 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5633012248625926 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12537764350453173 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4262395833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38314494680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-4/85ba493b-05f1-4853-a0ff-44570a7c2a82.json b/data/hfopenllm_v2/jaspionjader/bh-4/85ba493b-05f1-4853-a0ff-44570a7c2a82.json new file mode 100644 index 000000000..68408fe15 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-4/85ba493b-05f1-4853-a0ff-44570a7c2a82.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-4/1762652580.2554429", + "retrieved_timestamp": "1762652580.255444", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-4", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4672912317096415 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5892000111391051 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1095166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41728125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3705119680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-40/56837896-11a6-458b-a17e-9540ab5ae66a.json b/data/hfopenllm_v2/jaspionjader/bh-40/56837896-11a6-458b-a17e-9540ab5ae66a.json new file mode 100644 index 000000000..efcca4c37 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-40/56837896-11a6-458b-a17e-9540ab5ae66a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-40/1762652580.2556531", + "retrieved_timestamp": "1762652580.2556539", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-40", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-40" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45357761849669986 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5633956317971519 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12462235649546828 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4236041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38347739361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-41/db0c4182-7391-40e7-ad6e-5374c8eb28e1.json b/data/hfopenllm_v2/jaspionjader/bh-41/db0c4182-7391-40e7-ad6e-5374c8eb28e1.json new file mode 100644 index 000000000..4d0c9274f --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-41/db0c4182-7391-40e7-ad6e-5374c8eb28e1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-41/1762652580.2558541", + "retrieved_timestamp": "1762652580.2558541", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-41", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-41" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4739856912990864 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.56138466485423 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12537764350453173 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41827083333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38248005319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-42/265e3cbb-484f-4cf7-8994-050f414ecf37.json b/data/hfopenllm_v2/jaspionjader/bh-42/265e3cbb-484f-4cf7-8994-050f414ecf37.json new file mode 100644 index 000000000..d9b5b0510 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-42/265e3cbb-484f-4cf7-8994-050f414ecf37.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-42/1762652580.25606", + "retrieved_timestamp": "1762652580.2560608", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-42", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-42" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4660423232578447 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5645607204696422 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1268882175226586 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42100000000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3812333776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-43/472b725a-2bd5-440a-9768-ba8db6fe6b34.json b/data/hfopenllm_v2/jaspionjader/bh-43/472b725a-2bd5-440a-9768-ba8db6fe6b34.json new file mode 100644 index 000000000..7e6657ac7 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-43/472b725a-2bd5-440a-9768-ba8db6fe6b34.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-43/1762652580.2562718", + "retrieved_timestamp": "1762652580.2562718", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-43", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-43" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45999725173650363 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5635240412618795 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12386706948640483 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4156041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3819813829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-44/60c18178-ff40-4e9d-9683-077cc2fa254e.json b/data/hfopenllm_v2/jaspionjader/bh-44/60c18178-ff40-4e9d-9683-077cc2fa254e.json new file mode 100644 index 000000000..93baeba8e --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-44/60c18178-ff40-4e9d-9683-077cc2fa254e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-44/1762652580.2565289", + "retrieved_timestamp": "1762652580.2565298", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-44", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-44" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4706135276621586 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5642775941837409 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1216012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42487500000000006 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3833942819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-46/6b3c3872-cd4d-4827-8651-6baa9d2423e7.json b/data/hfopenllm_v2/jaspionjader/bh-46/6b3c3872-cd4d-4827-8651-6baa9d2423e7.json new file mode 100644 index 000000000..97f840046 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-46/6b3c3872-cd4d-4827-8651-6baa9d2423e7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-46/1762652580.2567308", + "retrieved_timestamp": "1762652580.256732", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-46", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-46" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4727367828472896 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5631697539272891 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12764350453172205 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4262395833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3822307180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-47/9f30c4d4-4a3c-459e-8444-e143ef75f84e.json b/data/hfopenllm_v2/jaspionjader/bh-47/9f30c4d4-4a3c-459e-8444-e143ef75f84e.json new file mode 100644 index 000000000..1f8b48b63 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-47/9f30c4d4-4a3c-459e-8444-e143ef75f84e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-47/1762652580.256935", + "retrieved_timestamp": "1762652580.2569358", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-47", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-47" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46516797652451053 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5545716016743777 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12764350453172205 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4156041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3854720744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-48/80bbd567-b13e-4ed4-ba85-9098639a3642.json b/data/hfopenllm_v2/jaspionjader/bh-48/80bbd567-b13e-4ed4-ba85-9098639a3642.json new file mode 100644 index 000000000..75e6f76f2 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-48/80bbd567-b13e-4ed4-ba85-9098639a3642.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-48/1762652580.257132", + "retrieved_timestamp": "1762652580.257133", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-48", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-48" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46881496651107946 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5541308128775738 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12537764350453173 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4209375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3859707446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-49/e574e35a-56cb-471d-b4f1-df0858f5ce66.json b/data/hfopenllm_v2/jaspionjader/bh-49/e574e35a-56cb-471d-b4f1-df0858f5ce66.json new file mode 100644 index 000000000..cd1b16546 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-49/e574e35a-56cb-471d-b4f1-df0858f5ce66.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-49/1762652580.257362", + "retrieved_timestamp": "1762652580.257366", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-49", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-49" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47246195649764844 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5540285004706683 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12009063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41290625000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38081781914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-5/ec314c97-9bc0-4e14-9d57-d6204e699428.json b/data/hfopenllm_v2/jaspionjader/bh-5/ec314c97-9bc0-4e14-9d57-d6204e699428.json new file mode 100644 index 000000000..67cada033 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-5/ec314c97-9bc0-4e14-9d57-d6204e699428.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-5/1762652580.2577002", + "retrieved_timestamp": "1762652580.2577012", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-5", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46516797652451053 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5881569099353959 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10574018126888217 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4186145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37017952127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-50/980887dd-2948-4e5f-b22c-3cc03057f493.json b/data/hfopenllm_v2/jaspionjader/bh-50/980887dd-2948-4e5f-b22c-3cc03057f493.json new file mode 100644 index 000000000..da25aafae --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-50/980887dd-2948-4e5f-b22c-3cc03057f493.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-50/1762652580.257925", + "retrieved_timestamp": "1762652580.257926", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-50", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-50" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47246195649764844 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.555294802866646 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41687500000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3842253989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-51/6d544c96-53c9-43d1-9cb1-6077d7235fff.json b/data/hfopenllm_v2/jaspionjader/bh-51/6d544c96-53c9-43d1-9cb1-6077d7235fff.json new file mode 100644 index 000000000..6d089439f --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-51/6d544c96-53c9-43d1-9cb1-6077d7235fff.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-51/1762652580.2581341", + "retrieved_timestamp": "1762652580.258135", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-51", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-51" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4630447213393795 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5557101784534039 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12386706948640483 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41681250000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38314494680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-52/fd3c9666-09bf-4562-b49d-eea905469761.json b/data/hfopenllm_v2/jaspionjader/bh-52/fd3c9666-09bf-4562-b49d-eea905469761.json new file mode 100644 index 000000000..38dbecaca --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-52/fd3c9666-09bf-4562-b49d-eea905469761.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-52/1762652580.258348", + "retrieved_timestamp": "1762652580.258349", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-52", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-52" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45362748618111054 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.544409095161705 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12009063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41690625000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38430851063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-53/978d4a27-17c7-4f87-b3e5-27b00ffa4d80.json b/data/hfopenllm_v2/jaspionjader/bh-53/978d4a27-17c7-4f87-b3e5-27b00ffa4d80.json new file mode 100644 index 000000000..67f420e21 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-53/978d4a27-17c7-4f87-b3e5-27b00ffa4d80.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-53/1762652580.25855", + "retrieved_timestamp": "1762652580.2585511", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-53", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-53" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4779573753197073 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5494367702137035 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1268882175226586 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29865771812080544 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4196041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38580452127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-54/9a2d7235-84cf-43f6-8855-68d0bf85e6e3.json b/data/hfopenllm_v2/jaspionjader/bh-54/9a2d7235-84cf-43f6-8855-68d0bf85e6e3.json new file mode 100644 index 000000000..795a34aea --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-54/9a2d7235-84cf-43f6-8855-68d0bf85e6e3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-54/1762652580.258788", + "retrieved_timestamp": "1762652580.258792", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-54", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-54" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48405231452545916 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5547738488653888 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12915407854984895 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4155416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38248005319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-55/7c388cc5-fb2f-48ba-967c-a931fcb25a42.json b/data/hfopenllm_v2/jaspionjader/bh-55/7c388cc5-fb2f-48ba-967c-a931fcb25a42.json new file mode 100644 index 000000000..d2c0d8478 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-55/7c388cc5-fb2f-48ba-967c-a931fcb25a42.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-55/1762652580.259115", + "retrieved_timestamp": "1762652580.259116", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-55", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-55" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47093822169621047 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5549641462109072 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42220833333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3846409574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-56/348c8f2b-807f-464b-832e-0049f8329b86.json b/data/hfopenllm_v2/jaspionjader/bh-56/348c8f2b-807f-464b-832e-0049f8329b86.json new file mode 100644 index 000000000..f24b442cb --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-56/348c8f2b-807f-464b-832e-0049f8329b86.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-56/1762652580.2593641", + "retrieved_timestamp": "1762652580.259365", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-56", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-56" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45999725173650363 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5446903231355648 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12311178247734139 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4116041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3843916223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-57/fab7388c-87ed-4108-ba4d-e1621925f264.json b/data/hfopenllm_v2/jaspionjader/bh-57/fab7388c-87ed-4108-ba4d-e1621925f264.json new file mode 100644 index 000000000..7a9352ac7 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-57/fab7388c-87ed-4108-ba4d-e1621925f264.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-57/1762652580.259624", + "retrieved_timestamp": "1762652580.259625", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-57", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-57" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44051339335186196 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5424621834237494 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12613293051359517 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42103124999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3896276595744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-58/a9c1b649-8850-43d1-b5db-feefd0b8d0b4.json b/data/hfopenllm_v2/jaspionjader/bh-58/a9c1b649-8850-43d1-b5db-feefd0b8d0b4.json new file mode 100644 index 000000000..1b4ed7877 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-58/a9c1b649-8850-43d1-b5db-feefd0b8d0b4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-58/1762652580.259867", + "retrieved_timestamp": "1762652580.259868", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-58", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-58" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4630447213393795 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5446322106157867 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13217522658610273 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4183333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3896276595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-59/974b1542-8716-4ea3-b097-f9893c9c9656.json b/data/hfopenllm_v2/jaspionjader/bh-59/974b1542-8716-4ea3-b097-f9893c9c9656.json new file mode 100644 index 000000000..22679a582 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-59/974b1542-8716-4ea3-b097-f9893c9c9656.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-59/1762652580.260088", + "retrieved_timestamp": "1762652580.2600892", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-59", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-59" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43414362779646887 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5511531646170439 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1540785498489426 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41700000000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3838098404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-6/e8dfd77c-e2c8-42ef-b341-5476411d038d.json b/data/hfopenllm_v2/jaspionjader/bh-6/e8dfd77c-e2c8-42ef-b341-5476411d038d.json new file mode 100644 index 000000000..b3acf6dcb --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-6/e8dfd77c-e2c8-42ef-b341-5476411d038d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-6/1762652580.260308", + "retrieved_timestamp": "1762652580.260309", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-6", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4620706392372239 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5890658635262072 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10876132930513595 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41991666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36976396276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-60/16d14b95-fe8b-4e1f-94e1-65d966ba24d6.json b/data/hfopenllm_v2/jaspionjader/bh-60/16d14b95-fe8b-4e1f-94e1-65d966ba24d6.json new file mode 100644 index 000000000..2240aec4c --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-60/16d14b95-fe8b-4e1f-94e1-65d966ba24d6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-60/1762652580.2605288", + "retrieved_timestamp": "1762652580.2605288", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-60", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-60" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42070484093316846 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5368509826419269 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1578549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32550335570469796 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42890625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3689328457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-61/00b1b367-c4eb-4048-b80d-a8253e7c2048.json b/data/hfopenllm_v2/jaspionjader/bh-61/00b1b367-c4eb-4048-b80d-a8253e7c2048.json new file mode 100644 index 000000000..b7d485d9c --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-61/00b1b367-c4eb-4048-b80d-a8253e7c2048.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-61/1762652580.260743", + "retrieved_timestamp": "1762652580.260743", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-61", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-61" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42467652495378927 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5271029876122725 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17069486404833836 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4355729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3679355053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-62/85bd08bf-bdc3-42fb-b8f9-3d83e32921bc.json b/data/hfopenllm_v2/jaspionjader/bh-62/85bd08bf-bdc3-42fb-b8f9-3d83e32921bc.json new file mode 100644 index 000000000..fa9c30990 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-62/85bd08bf-bdc3-42fb-b8f9-3d83e32921bc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-62/1762652580.260948", + "retrieved_timestamp": "1762652580.260949", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-62", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-62" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41498446344587914 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5379352222621877 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1623867069486405 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42890625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3719248670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-63/c9df2e30-5e2d-42cc-8597-dc354602350a.json b/data/hfopenllm_v2/jaspionjader/bh-63/c9df2e30-5e2d-42cc-8597-dc354602350a.json new file mode 100644 index 000000000..59deb5c66 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-63/c9df2e30-5e2d-42cc-8597-dc354602350a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-63/1762652580.261157", + "retrieved_timestamp": "1762652580.261157", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-63", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-63" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43077146415954115 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49171126396743653 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11102719033232629 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4312604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3248005319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-64/90830134-43d5-4d0c-9a93-4be2c1c7dba8.json b/data/hfopenllm_v2/jaspionjader/bh-64/90830134-43d5-4d0c-9a93-4be2c1c7dba8.json new file mode 100644 index 000000000..6705ea7f6 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-64/90830134-43d5-4d0c-9a93-4be2c1c7dba8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-64/1762652580.261374", + "retrieved_timestamp": "1762652580.261375", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-64", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-64" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41401038134372353 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5359944334653838 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15483383685800603 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4355416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3692652925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-7/b63d1462-f84b-4d20-86d6-1a54cf4eb81f.json b/data/hfopenllm_v2/jaspionjader/bh-7/b63d1462-f84b-4d20-86d6-1a54cf4eb81f.json new file mode 100644 index 000000000..5721cb645 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-7/b63d1462-f84b-4d20-86d6-1a54cf4eb81f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-7/1762652580.261788", + "retrieved_timestamp": "1762652580.261791", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-7", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4623953332712758 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5860594415302606 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11404833836858005 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41191666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3715093085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-8/f6dced28-f64c-4995-88b1-ac9a82903de2.json b/data/hfopenllm_v2/jaspionjader/bh-8/f6dced28-f64c-4995-88b1-ac9a82903de2.json new file mode 100644 index 000000000..47959dd08 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-8/f6dced28-f64c-4995-88b1-ac9a82903de2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-8/1762652580.262149", + "retrieved_timestamp": "1762652580.262152", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-8", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-8" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45967255770245175 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5899505025903907 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11782477341389729 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4265208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37200797872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/bh-9/956d92e9-51fb-4770-8687-6003f9594345.json b/data/hfopenllm_v2/jaspionjader/bh-9/956d92e9-51fb-4770-8687-6003f9594345.json new file mode 100644 index 000000000..80a3dc661 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/bh-9/956d92e9-51fb-4770-8687-6003f9594345.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_bh-9/1762652580.262652", + "retrieved_timestamp": "1762652580.2626529", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/bh-9", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/bh-9" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4508548429278758 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5850048697918168 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11555891238670694 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4145833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3702626329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/dp-6-8b/5c61d4f5-25a0-4ffe-a9d2-2a33d8bbd717.json b/data/hfopenllm_v2/jaspionjader/dp-6-8b/5c61d4f5-25a0-4ffe-a9d2-2a33d8bbd717.json new file mode 100644 index 000000000..56b878294 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/dp-6-8b/5c61d4f5-25a0-4ffe-a9d2-2a33d8bbd717.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_dp-6-8b/1762652580.263117", + "retrieved_timestamp": "1762652580.2631192", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/dp-6-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/dp-6-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4805804155197099 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5299697041031141 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13293051359516617 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44338541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38971077127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/dp-7-8b/44d85302-1af8-48ef-aebe-a9512c5bc387.json b/data/hfopenllm_v2/jaspionjader/dp-7-8b/44d85302-1af8-48ef-aebe-a9512c5bc387.json new file mode 100644 index 000000000..f96126fb6 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/dp-7-8b/44d85302-1af8-48ef-aebe-a9512c5bc387.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_dp-7-8b/1762652580.2634509", + "retrieved_timestamp": "1762652580.2634518", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/dp-7-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/dp-7-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44983089314130953 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5290850650389306 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12613293051359517 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44075 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3933676861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/ek-6/a05ce252-928c-4482-95f7-f4c0fc2c7c10.json b/data/hfopenllm_v2/jaspionjader/ek-6/a05ce252-928c-4482-95f7-f4c0fc2c7c10.json new file mode 100644 index 000000000..8d60b81d5 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/ek-6/a05ce252-928c-4482-95f7-f4c0fc2c7c10.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_ek-6/1762652580.2637498", + "retrieved_timestamp": "1762652580.263751", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/ek-6", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/ek-6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4642437621067656 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5219292795769993 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13217522658610273 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4143645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3861369680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/ek-7/23127691-ff90-433f-97d2-322e1191d821.json b/data/hfopenllm_v2/jaspionjader/ek-7/23127691-ff90-433f-97d2-322e1191d821.json new file mode 100644 index 000000000..d1547609f --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/ek-7/23127691-ff90-433f-97d2-322e1191d821.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_ek-7/1762652580.264135", + "retrieved_timestamp": "1762652580.2641358", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/ek-7", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/ek-7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47670846686791046 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5194098090521417 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13293051359516617 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41706249999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38871343085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/f-1-8b/91d65b2a-a96a-467b-9e5c-9efa28d7fd96.json b/data/hfopenllm_v2/jaspionjader/f-1-8b/91d65b2a-a96a-467b-9e5c-9efa28d7fd96.json new file mode 100644 index 000000000..13897e64e --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/f-1-8b/91d65b2a-a96a-467b-9e5c-9efa28d7fd96.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_f-1-8b/1762652580.264415", + "retrieved_timestamp": "1762652580.264416", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/f-1-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/f-1-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49826571275327247 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5140825686172996 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45268749999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39070811170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/f-2-8b/c63fc798-cf74-4767-ba95-6353b6761bcc.json b/data/hfopenllm_v2/jaspionjader/f-2-8b/c63fc798-cf74-4767-ba95-6353b6761bcc.json new file mode 100644 index 000000000..feb269597 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/f-2-8b/c63fc798-cf74-4767-ba95-6353b6761bcc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_f-2-8b/1762652580.264705", + "retrieved_timestamp": "1762652580.2647061", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/f-2-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/f-2-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48237897667078905 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5294150378468933 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11706948640483383 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4500520833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39619348404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/f-3-8b/5ba1e4d3-29d4-4337-bd10-9e1a5df29af4.json b/data/hfopenllm_v2/jaspionjader/f-3-8b/5ba1e4d3-29d4-4337-bd10-9e1a5df29af4.json new file mode 100644 index 000000000..180b12531 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/f-3-8b/5ba1e4d3-29d4-4337-bd10-9e1a5df29af4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_f-3-8b/1762652580.264997", + "retrieved_timestamp": "1762652580.264998", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/f-3-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/f-3-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4803055891700687 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5274906581043712 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1216012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44208333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39544547872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/f-4-8b/a98ec95c-4af0-4b55-adbc-06e5ceecd00f.json b/data/hfopenllm_v2/jaspionjader/f-4-8b/a98ec95c-4af0-4b55-adbc-06e5ceecd00f.json new file mode 100644 index 000000000..09b854811 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/f-4-8b/a98ec95c-4af0-4b55-adbc-06e5ceecd00f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_f-4-8b/1762652580.265391", + "retrieved_timestamp": "1762652580.2653928", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/f-4-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/f-4-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4797060687863757 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5288622486396436 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1148036253776435 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45141666666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39561170212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/f-5-8b/4dd614dc-b68b-456c-ac55-f2221a479caa.json b/data/hfopenllm_v2/jaspionjader/f-5-8b/4dd614dc-b68b-456c-ac55-f2221a479caa.json new file mode 100644 index 000000000..d4d7ddc10 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/f-5-8b/4dd614dc-b68b-456c-ac55-f2221a479caa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_f-5-8b/1762652580.265783", + "retrieved_timestamp": "1762652580.265785", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/f-5-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/f-5-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5043606519590242 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5313273519630752 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12386706948640483 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4460520833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39486369680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/f-6-8b/2a71c7d7-8ae6-45e7-ab7f-54f7d31dd131.json b/data/hfopenllm_v2/jaspionjader/f-6-8b/2a71c7d7-8ae6-45e7-ab7f-54f7d31dd131.json new file mode 100644 index 000000000..ca5975e41 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/f-6-8b/2a71c7d7-8ae6-45e7-ab7f-54f7d31dd131.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_f-6-8b/1762652580.2661529", + "retrieved_timestamp": "1762652580.266155", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/f-6-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/f-6-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48460196722474147 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.524094753042471 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11933534743202417 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44735416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3939494680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/f-7-8b/e8c5d934-c9b6-460c-bd45-c4a3e2d26bed.json b/data/hfopenllm_v2/jaspionjader/f-7-8b/e8c5d934-c9b6-460c-bd45-c4a3e2d26bed.json new file mode 100644 index 000000000..6e4770307 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/f-7-8b/e8c5d934-c9b6-460c-bd45-c4a3e2d26bed.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_f-7-8b/1762652580.2664478", + "retrieved_timestamp": "1762652580.266449", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/f-7-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/f-7-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4462337708391512 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5277022085059414 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12386706948640483 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4315104166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39361702127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/f-8-8b/dad898e1-ee18-4864-b432-462d17ac8006.json b/data/hfopenllm_v2/jaspionjader/f-8-8b/dad898e1-ee18-4864-b432-462d17ac8006.json new file mode 100644 index 000000000..d49e6fbb5 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/f-8-8b/dad898e1-ee18-4864-b432-462d17ac8006.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_f-8-8b/1762652580.266931", + "retrieved_timestamp": "1762652580.266932", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/f-8-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/f-8-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4739358236146758 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5259311478463803 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12235649546827794 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43544791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39403257978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/f-9-8b/1373c279-13b7-46d3-94a4-7b47c9319f88.json b/data/hfopenllm_v2/jaspionjader/f-9-8b/1373c279-13b7-46d3-94a4-7b47c9319f88.json new file mode 100644 index 000000000..ec7913e9e --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/f-9-8b/1373c279-13b7-46d3-94a4-7b47c9319f88.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_f-9-8b/1762652580.267217", + "retrieved_timestamp": "1762652580.2672179", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/f-9-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/f-9-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4601723427173233 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5291558412946383 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44608333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3943650265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/fct-14-8b/22c3022f-d538-4a4d-8d4b-05e915506451.json b/data/hfopenllm_v2/jaspionjader/fct-14-8b/22c3022f-d538-4a4d-8d4b-05e915506451.json new file mode 100644 index 000000000..c265b7c6a --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/fct-14-8b/22c3022f-d538-4a4d-8d4b-05e915506451.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_fct-14-8b/1762652580.2674618", + "retrieved_timestamp": "1762652580.267463", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/fct-14-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/fct-14-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4128612082607481 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5206018889288543 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12009063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4185520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3875498670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/fct-9-8b/4d1ddf64-4626-4877-a0fa-84e06f6cf977.json b/data/hfopenllm_v2/jaspionjader/fct-9-8b/4d1ddf64-4626-4877-a0fa-84e06f6cf977.json new file mode 100644 index 000000000..93f909483 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/fct-9-8b/4d1ddf64-4626-4877-a0fa-84e06f6cf977.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_fct-9-8b/1762652580.267691", + "retrieved_timestamp": "1762652580.267692", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/fct-9-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/fct-9-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4353925362482657 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.520510244410076 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11933534743202417 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42906249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39320146276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/fr-1-8b/2014c198-5e12-41ef-8f65-7321d0423573.json b/data/hfopenllm_v2/jaspionjader/fr-1-8b/2014c198-5e12-41ef-8f65-7321d0423573.json new file mode 100644 index 000000000..867735571 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/fr-1-8b/2014c198-5e12-41ef-8f65-7321d0423573.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_fr-1-8b/1762652580.267912", + "retrieved_timestamp": "1762652580.2679129", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/fr-1-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/fr-1-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.421079402651631 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5142290494968609 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11178247734138973 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4276979166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36103723404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/fr-10-8b/725e5a72-548f-46d0-b268-12209e5cb085.json b/data/hfopenllm_v2/jaspionjader/fr-10-8b/725e5a72-548f-46d0-b268-12209e5cb085.json new file mode 100644 index 000000000..4b0629e10 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/fr-10-8b/725e5a72-548f-46d0-b268-12209e5cb085.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_fr-10-8b/1762652580.268136", + "retrieved_timestamp": "1762652580.268136", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/fr-10-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/fr-10-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44018869931781013 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5206624978702634 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12235649546827794 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4118541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3863031914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/fr-3-8b/8bdd1aba-81e4-44d1-acfd-6efeaf391ac8.json b/data/hfopenllm_v2/jaspionjader/fr-3-8b/8bdd1aba-81e4-44d1-acfd-6efeaf391ac8.json new file mode 100644 index 000000000..36cd34ef2 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/fr-3-8b/8bdd1aba-81e4-44d1-acfd-6efeaf391ac8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_fr-3-8b/1762652580.268359", + "retrieved_timestamp": "1762652580.26836", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/fr-3-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/fr-3-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4325700253106203 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5255174690526301 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11329305135951662 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41982291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3863031914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-8B/6e5584a8-5b8e-48ce-8b80-2d39a74a9b0d.json b/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-8B/6e5584a8-5b8e-48ce-8b80-2d39a74a9b0d.json new file mode 100644 index 000000000..b16c7d7ba --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-8B/6e5584a8-5b8e-48ce-8b80-2d39a74a9b0d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_gamma-Kosmos-EVAA-8B/1762652580.268576", + "retrieved_timestamp": "1762652580.268577", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/gamma-Kosmos-EVAA-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/gamma-Kosmos-EVAA-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42500121898784116 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5252624326543692 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08987915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44115624999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37757646276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v2-8B/67f972e1-4ebd-4b78-b740-fdc03ac88aac.json b/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v2-8B/67f972e1-4ebd-4b78-b740-fdc03ac88aac.json new file mode 100644 index 000000000..c9fbde590 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v2-8B/67f972e1-4ebd-4b78-b740-fdc03ac88aac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_gamma-Kosmos-EVAA-v2-8B/1762652580.268805", + "retrieved_timestamp": "1762652580.268806", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/gamma-Kosmos-EVAA-v2-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/gamma-Kosmos-EVAA-v2-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4232525255211727 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5262464083930688 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10574018126888217 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4343958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3755817819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v3-8B/d461545f-ebcb-49e2-94ce-a6591e31a94a.json b/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v3-8B/d461545f-ebcb-49e2-94ce-a6591e31a94a.json new file mode 100644 index 000000000..8162acbc7 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/gamma-Kosmos-EVAA-v3-8B/d461545f-ebcb-49e2-94ce-a6591e31a94a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_gamma-Kosmos-EVAA-v3-8B/1762652580.269119", + "retrieved_timestamp": "1762652580.26912", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/gamma-Kosmos-EVAA-v3-8B", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/gamma-Kosmos-EVAA-v3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43326928106313467 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.527793553969925 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11102719033232629 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4263020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3897938829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/knf-2-8b/267e641c-7fbd-40d3-a9b7-eb3621240b2a.json b/data/hfopenllm_v2/jaspionjader/knf-2-8b/267e641c-7fbd-40d3-a9b7-eb3621240b2a.json new file mode 100644 index 000000000..d8f91ae50 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/knf-2-8b/267e641c-7fbd-40d3-a9b7-eb3621240b2a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_knf-2-8b/1762652580.269415", + "retrieved_timestamp": "1762652580.2694159", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/knf-2-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/knf-2-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42500121898784116 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5206718655559387 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12009063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4185208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3874667553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/knfp-2-8b/0bd6a333-afc0-43a4-9d14-fa44c2364184.json b/data/hfopenllm_v2/jaspionjader/knfp-2-8b/0bd6a333-afc0-43a4-9d14-fa44c2364184.json new file mode 100644 index 000000000..618cb4a56 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/knfp-2-8b/0bd6a333-afc0-43a4-9d14-fa44c2364184.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_knfp-2-8b/1762652580.2696629", + "retrieved_timestamp": "1762652580.269664", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/knfp-2-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/knfp-2-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5327120928026525 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5304878011708133 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14274924471299094 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4184583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37258976063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/knfp-3-8b/38a5c599-a098-42f4-a7cb-acee487e382a.json b/data/hfopenllm_v2/jaspionjader/knfp-3-8b/38a5c599-a098-42f4-a7cb-acee487e382a.json new file mode 100644 index 000000000..253240260 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/knfp-3-8b/38a5c599-a098-42f4-a7cb-acee487e382a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_knfp-3-8b/1762652580.2700531", + "retrieved_timestamp": "1762652580.2700539", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/knfp-3-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/knfp-3-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49456885508229276 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5199790073136731 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12235649546827794 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41712499999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3881316489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/kstc-1-8b/cd7e14cb-b1f1-47d8-81a9-960da8ac4e05.json b/data/hfopenllm_v2/jaspionjader/kstc-1-8b/cd7e14cb-b1f1-47d8-81a9-960da8ac4e05.json new file mode 100644 index 000000000..96bb72cd2 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/kstc-1-8b/cd7e14cb-b1f1-47d8-81a9-960da8ac4e05.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-1-8b/1762652580.2702851", + "retrieved_timestamp": "1762652580.270286", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/kstc-1-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/kstc-1-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4642936297911763 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5209048705325947 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11706948640483383 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4157916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3892121010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/kstc-11-8b/41b46842-dffa-4791-8225-99d676f563c9.json b/data/hfopenllm_v2/jaspionjader/kstc-11-8b/41b46842-dffa-4791-8225-99d676f563c9.json new file mode 100644 index 000000000..93334c07f --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/kstc-11-8b/41b46842-dffa-4791-8225-99d676f563c9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-11-8b/1762652580.270522", + "retrieved_timestamp": "1762652580.270522", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/kstc-11-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/kstc-11-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4757343847657549 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5189389675805397 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12009063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4117604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3878823138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/kstc-4-8b/6b63598f-4891-4b71-99ca-bc56b780d829.json b/data/hfopenllm_v2/jaspionjader/kstc-4-8b/6b63598f-4891-4b71-99ca-bc56b780d829.json new file mode 100644 index 000000000..58bb003d4 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/kstc-4-8b/6b63598f-4891-4b71-99ca-bc56b780d829.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-4-8b/1762652580.270735", + "retrieved_timestamp": "1762652580.270736", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/kstc-4-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/kstc-4-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4769832932175517 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5216059333020012 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12386706948640483 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4117916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3868849734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/kstc-5-8b/ea79ca75-c55b-457a-b952-528a22567dbb.json b/data/hfopenllm_v2/jaspionjader/kstc-5-8b/ea79ca75-c55b-457a-b952-528a22567dbb.json new file mode 100644 index 000000000..d36f01936 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/kstc-5-8b/ea79ca75-c55b-457a-b952-528a22567dbb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-5-8b/1762652580.270952", + "retrieved_timestamp": "1762652580.270953", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/kstc-5-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/kstc-5-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47208739477918593 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5211438914491455 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4223958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3892121010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/kstc-6-8b/f7d63a4b-070d-4581-acce-cd356a3dea47.json b/data/hfopenllm_v2/jaspionjader/kstc-6-8b/f7d63a4b-070d-4581-acce-cd356a3dea47.json new file mode 100644 index 000000000..19241db76 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/kstc-6-8b/f7d63a4b-070d-4581-acce-cd356a3dea47.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-6-8b/1762652580.2711701", + "retrieved_timestamp": "1762652580.2711701", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/kstc-6-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/kstc-6-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49439376410147295 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5230977287748603 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12462235649546828 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4104895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3857214095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/kstc-8-8b/85502cb7-db11-43ce-a3cf-f9329ecec2e1.json b/data/hfopenllm_v2/jaspionjader/kstc-8-8b/85502cb7-db11-43ce-a3cf-f9329ecec2e1.json new file mode 100644 index 000000000..76709f3fc --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/kstc-8-8b/85502cb7-db11-43ce-a3cf-f9329ecec2e1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-8-8b/1762652580.271383", + "retrieved_timestamp": "1762652580.271384", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/kstc-8-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/kstc-8-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49097173278013445 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5238910223750602 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13066465256797583 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42112499999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3888796542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/kstc-9-8b/5f36e182-fa70-41d9-9cc6-12367035fc76.json b/data/hfopenllm_v2/jaspionjader/kstc-9-8b/5f36e182-fa70-41d9-9cc6-12367035fc76.json new file mode 100644 index 000000000..9e3959758 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/kstc-9-8b/5f36e182-fa70-41d9-9cc6-12367035fc76.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-9-8b/1762652580.27159", + "retrieved_timestamp": "1762652580.27159", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/kstc-9-8b", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/kstc-9-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4860758343417687 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5238366551736342 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13595166163141995 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4117916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38721742021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-10/79c255e5-8a6b-4afd-a03e-e35cbcbcc712.json b/data/hfopenllm_v2/jaspionjader/slu-10/79c255e5-8a6b-4afd-a03e-e35cbcbcc712.json new file mode 100644 index 000000000..4aca0bb19 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-10/79c255e5-8a6b-4afd-a03e-e35cbcbcc712.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-10/1762652580.271806", + "retrieved_timestamp": "1762652580.271807", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/slu-10", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/slu-10" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4359920566319587 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5096469529197213 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09743202416918428 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3920104166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3663563829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-11/0091eabc-3888-4e1a-a29d-8c4e98b599f2.json b/data/hfopenllm_v2/jaspionjader/slu-11/0091eabc-3888-4e1a-a29d-8c4e98b599f2.json new file mode 100644 index 000000000..683aa6b30 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-11/0091eabc-3888-4e1a-a29d-8c4e98b599f2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-11/1762652580.272018", + "retrieved_timestamp": "1762652580.272018", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/slu-11", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/slu-11" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.372519359743259 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4890236865115587 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.055891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3919479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33818151595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-13/1a1eaa84-9926-4c4b-b254-96cd667c25ac.json b/data/hfopenllm_v2/jaspionjader/slu-13/1a1eaa84-9926-4c4b-b254-96cd667c25ac.json new file mode 100644 index 000000000..895ad086a --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-13/1a1eaa84-9926-4c4b-b254-96cd667c25ac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-13/1762652580.272234", + "retrieved_timestamp": "1762652580.272237", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/slu-13", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/slu-13" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4378404854674486 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5097334543819346 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08081570996978851 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38140625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35804521276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-14/59703023-61e1-4df0-8542-703d5a318756.json b/data/hfopenllm_v2/jaspionjader/slu-14/59703023-61e1-4df0-8542-703d5a318756.json new file mode 100644 index 000000000..335e9416b --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-14/59703023-61e1-4df0-8542-703d5a318756.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-14/1762652580.27245", + "retrieved_timestamp": "1762652580.2724512", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/slu-14", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/slu-14" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4106880853912065 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5088505978489455 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09743202416918428 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3960416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3626994680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-17/fea528ae-4015-4adf-bce0-f9775554cc5f.json b/data/hfopenllm_v2/jaspionjader/slu-17/fea528ae-4015-4adf-bce0-f9775554cc5f.json new file mode 100644 index 000000000..f88fc956c --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-17/fea528ae-4015-4adf-bce0-f9775554cc5f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-17/1762652580.272654", + "retrieved_timestamp": "1762652580.272655", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/slu-17", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/slu-17" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42167892303532406 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5070562055653275 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08534743202416918 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3761041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3618683510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-2/1950fba0-3a1b-4cbe-8fa5-9947ed8e4bad.json b/data/hfopenllm_v2/jaspionjader/slu-2/1950fba0-3a1b-4cbe-8fa5-9947ed8e4bad.json new file mode 100644 index 000000000..6463ec1e0 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-2/1950fba0-3a1b-4cbe-8fa5-9947ed8e4bad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-2/1762652580.2728698", + "retrieved_timestamp": "1762652580.272871", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/slu-2", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/slu-2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40159554426698935 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5008068127974601 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3958854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35064827127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-20/1430e550-80ca-4f84-952f-b5b10fbca711.json b/data/hfopenllm_v2/jaspionjader/slu-20/1430e550-80ca-4f84-952f-b5b10fbca711.json new file mode 100644 index 000000000..68fd7fdbd --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-20/1430e550-80ca-4f84-952f-b5b10fbca711.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-20/1762652580.273083", + "retrieved_timestamp": "1762652580.273084", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/slu-20", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/slu-20" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4393143525844759 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5061273966566772 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08685800604229606 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39334375000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36652260638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-22/c0898ca4-21a7-4d83-ad2e-1aa61bd370fa.json b/data/hfopenllm_v2/jaspionjader/slu-22/c0898ca4-21a7-4d83-ad2e-1aa61bd370fa.json new file mode 100644 index 000000000..d6bd7cf71 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-22/c0898ca4-21a7-4d83-ad2e-1aa61bd370fa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-22/1762652580.2733881", + "retrieved_timestamp": "1762652580.273391", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/slu-22", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/slu-22" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4321201079801593 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5081790871805086 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07930513595166164 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38934375000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3650265957446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-23/f4b76351-e472-47a9-8011-6bf2e7e33a71.json b/data/hfopenllm_v2/jaspionjader/slu-23/f4b76351-e472-47a9-8011-6bf2e7e33a71.json new file mode 100644 index 000000000..4b7433c0d --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-23/f4b76351-e472-47a9-8011-6bf2e7e33a71.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-23/1762652580.27371", + "retrieved_timestamp": "1762652580.2737112", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/slu-23", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/slu-23" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44780737332499987 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5131603005034272 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09441087613293052 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40924999999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3725066489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-25/03c03447-1bf3-4721-8f9e-5ef041ab5d7d.json b/data/hfopenllm_v2/jaspionjader/slu-25/03c03447-1bf3-4721-8f9e-5ef041ab5d7d.json new file mode 100644 index 000000000..98d0eb7f2 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-25/03c03447-1bf3-4721-8f9e-5ef041ab5d7d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-25/1762652580.27394", + "retrieved_timestamp": "1762652580.273941", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/slu-25", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/slu-25" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4500303638789523 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5094887898349904 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08383685800604229 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3946145833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3684341755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-29/fe231e36-6cc2-412c-b86e-0ba6ba9cc430.json b/data/hfopenllm_v2/jaspionjader/slu-29/fe231e36-6cc2-412c-b86e-0ba6ba9cc430.json new file mode 100644 index 000000000..6f9e95465 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-29/fe231e36-6cc2-412c-b86e-0ba6ba9cc430.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-29/1762652580.274164", + "retrieved_timestamp": "1762652580.274165", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/slu-29", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/slu-29" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4430610779398662 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5096472519745161 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08685800604229606 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3933125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.366938164893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-32/1095577f-7b50-4854-9c7c-5beb59206e60.json b/data/hfopenllm_v2/jaspionjader/slu-32/1095577f-7b50-4854-9c7c-5beb59206e60.json new file mode 100644 index 000000000..553acecb1 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-32/1095577f-7b50-4854-9c7c-5beb59206e60.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-32/1762652580.274382", + "retrieved_timestamp": "1762652580.274383", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/slu-32", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/slu-32" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45155409868039026 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5167277162337642 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10725075528700906 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4039166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3765791223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-33/2597a3df-0f30-43d1-b1b3-7a0baac07675.json b/data/hfopenllm_v2/jaspionjader/slu-33/2597a3df-0f30-43d1-b1b3-7a0baac07675.json new file mode 100644 index 000000000..b6c2d20a9 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-33/2597a3df-0f30-43d1-b1b3-7a0baac07675.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-33/1762652580.274691", + "retrieved_timestamp": "1762652580.274692", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/slu-33", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/slu-33" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4457339858242796 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5081308429202344 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09969788519637462 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38667708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3679355053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-34/050afa51-be7c-4cad-ae8b-bd63384df297.json b/data/hfopenllm_v2/jaspionjader/slu-34/050afa51-be7c-4cad-ae8b-bd63384df297.json new file mode 100644 index 000000000..ae2ba09f1 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-34/050afa51-be7c-4cad-ae8b-bd63384df297.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-34/1762652580.2749598", + "retrieved_timestamp": "1762652580.274961", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/slu-34", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/slu-34" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4350678422142138 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5077400809148992 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09969788519637462 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3880416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37200797872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-35/0d7698b6-de52-4781-831f-a3ca8b23dd72.json b/data/hfopenllm_v2/jaspionjader/slu-35/0d7698b6-de52-4781-831f-a3ca8b23dd72.json new file mode 100644 index 000000000..190ac240d --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-35/0d7698b6-de52-4781-831f-a3ca8b23dd72.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-35/1762652580.275198", + "retrieved_timestamp": "1762652580.2752", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/slu-35", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/slu-35" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42417673993891764 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5103079759559944 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10120845921450151 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39464583333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3676030585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-36/cf85253f-0ecd-4943-a508-eab1e562a497.json b/data/hfopenllm_v2/jaspionjader/slu-36/cf85253f-0ecd-4943-a508-eab1e562a497.json new file mode 100644 index 000000000..59d405570 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-36/cf85253f-0ecd-4943-a508-eab1e562a497.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-36/1762652580.275441", + "retrieved_timestamp": "1762652580.275442", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/slu-36", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/slu-36" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4518289250300314 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5087352369131289 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09063444108761329 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3933125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37109375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-37/e64e5fe0-c726-4b9d-9d7b-952e7c7508ab.json b/data/hfopenllm_v2/jaspionjader/slu-37/e64e5fe0-c726-4b9d-9d7b-952e7c7508ab.json new file mode 100644 index 000000000..32add8ede --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-37/e64e5fe0-c726-4b9d-9d7b-952e7c7508ab.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-37/1762652580.2757561", + "retrieved_timestamp": "1762652580.275757", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/slu-37", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/slu-37" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4533526598314694 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5099854293096197 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09743202416918428 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39464583333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3695146276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-6/0e1cd676-f95b-4562-8c5d-e932f148dc23.json b/data/hfopenllm_v2/jaspionjader/slu-6/0e1cd676-f95b-4562-8c5d-e932f148dc23.json new file mode 100644 index 000000000..614d3bd2a --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-6/0e1cd676-f95b-4562-8c5d-e932f148dc23.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-6/1762652580.276035", + "retrieved_timestamp": "1762652580.276036", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/slu-6", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/slu-6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41166216749336204 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5098719666858446 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09441087613293052 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4066458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3611203457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/slu-mix-1/3a8a175f-5173-491b-9acf-87fe781f16df.json b/data/hfopenllm_v2/jaspionjader/slu-mix-1/3a8a175f-5173-491b-9acf-87fe781f16df.json new file mode 100644 index 000000000..fa1afeab8 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/slu-mix-1/3a8a175f-5173-491b-9acf-87fe781f16df.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_slu-mix-1/1762652580.276264", + "retrieved_timestamp": "1762652580.276264", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/slu-mix-1", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/slu-mix-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45689991444921696 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5240269525191525 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11178247734138973 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42766666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39303523936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/sof-1/b1f4196a-0050-4107-a97b-4e1bd6ece17b.json b/data/hfopenllm_v2/jaspionjader/sof-1/b1f4196a-0050-4107-a97b-4e1bd6ece17b.json new file mode 100644 index 000000000..c41d83619 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/sof-1/b1f4196a-0050-4107-a97b-4e1bd6ece17b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_sof-1/1762652580.276484", + "retrieved_timestamp": "1762652580.2764852", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/sof-1", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/sof-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4313709845432342 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5009822733212669 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11404833836858005 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40819791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.367436835106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/sof-10/03761253-711d-428d-a3bd-89974a50b490.json b/data/hfopenllm_v2/jaspionjader/sof-10/03761253-711d-428d-a3bd-89974a50b490.json new file mode 100644 index 000000000..177ef6f39 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/sof-10/03761253-711d-428d-a3bd-89974a50b490.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_sof-10/1762652580.276895", + "retrieved_timestamp": "1762652580.276897", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/sof-10", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/sof-10" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46484328249045864 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5197177291754291 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12386706948640483 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40906250000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38738364361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/sof-3/e5cd6a8b-88ed-4a0d-8584-889a4fde72a7.json b/data/hfopenllm_v2/jaspionjader/sof-3/e5cd6a8b-88ed-4a0d-8584-889a4fde72a7.json new file mode 100644 index 000000000..5c4179089 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/sof-3/e5cd6a8b-88ed-4a0d-8584-889a4fde72a7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_sof-3/1762652580.277219", + "retrieved_timestamp": "1762652580.27722", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/sof-3", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/sof-3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46369410940748323 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5206072122413828 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12764350453172205 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41312499999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3812333776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/sof-6/0755b7f9-bdd7-4e2a-92da-6650934db265.json b/data/hfopenllm_v2/jaspionjader/sof-6/0755b7f9-bdd7-4e2a-92da-6650934db265.json new file mode 100644 index 000000000..bf6ee8905 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/sof-6/0755b7f9-bdd7-4e2a-92da-6650934db265.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_sof-6/1762652580.277473", + "retrieved_timestamp": "1762652580.2774742", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/sof-6", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/sof-6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4353925362482657 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5209098090521417 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41706250000000006 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3843916223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-10/2bcc7f9a-9c36-487e-8522-bfbe1910b857.json b/data/hfopenllm_v2/jaspionjader/test-10/2bcc7f9a-9c36-487e-8522-bfbe1910b857.json new file mode 100644 index 000000000..c6b003257 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-10/2bcc7f9a-9c36-487e-8522-bfbe1910b857.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-10/1762652580.2777631", + "retrieved_timestamp": "1762652580.277764", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/test-10", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/test-10" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4578241288669619 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5316217442466934 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11404833836858005 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42509375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39361702127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-11/98f97092-7c95-46dd-94c7-4030f153d197.json b/data/hfopenllm_v2/jaspionjader/test-11/98f97092-7c95-46dd-94c7-4030f153d197.json new file mode 100644 index 000000000..a454b35a6 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-11/98f97092-7c95-46dd-94c7-4030f153d197.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-11/1762652580.2779882", + "retrieved_timestamp": "1762652580.2779891", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/test-11", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/test-11" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45412727119598223 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5350048053167004 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12009063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.429 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3939494680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-12/e49c9cc8-96ff-4a3c-b7b4-ea5562f41449.json b/data/hfopenllm_v2/jaspionjader/test-12/e49c9cc8-96ff-4a3c-b7b4-ea5562f41449.json new file mode 100644 index 000000000..21a126d67 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-12/e49c9cc8-96ff-4a3c-b7b4-ea5562f41449.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-12/1762652580.278201", + "retrieved_timestamp": "1762652580.278202", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/test-12", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/test-12" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4368165356808823 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5347063686599355 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10800604229607251 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42503124999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3935339095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-13/98772920-a700-4fda-88fd-53c16ac4b1a1.json b/data/hfopenllm_v2/jaspionjader/test-13/98772920-a700-4fda-88fd-53c16ac4b1a1.json new file mode 100644 index 000000000..1ee8baa89 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-13/98772920-a700-4fda-88fd-53c16ac4b1a1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-13/1762652580.278408", + "retrieved_timestamp": "1762652580.278409", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/test-13", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/test-13" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45809895521660304 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.531808681066841 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10574018126888217 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4263958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3935339095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-14/d647b482-3d3b-4ed4-b8b5-d57eedf87db9.json b/data/hfopenllm_v2/jaspionjader/test-14/d647b482-3d3b-4ed4-b8b5-d57eedf87db9.json new file mode 100644 index 000000000..49772b91f --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-14/d647b482-3d3b-4ed4-b8b5-d57eedf87db9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-14/1762652580.2787268", + "retrieved_timestamp": "1762652580.278728", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/test-14", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/test-14" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4443853420036614 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5322932549151301 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11027190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4316979166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3929521276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-15/f197c7ce-c30a-49ad-bd6c-9571d3b25637.json b/data/hfopenllm_v2/jaspionjader/test-15/f197c7ce-c30a-49ad-bd6c-9571d3b25637.json new file mode 100644 index 000000000..fa93524e0 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-15/f197c7ce-c30a-49ad-bd6c-9571d3b25637.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-15/1762652580.278964", + "retrieved_timestamp": "1762652580.278965", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/test-15", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/test-15" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4364918416468304 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.53278841091336 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11178247734138973 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4264270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3929521276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-16/80c756a7-9d47-4b49-bf42-bbada0909163.json b/data/hfopenllm_v2/jaspionjader/test-16/80c756a7-9d47-4b49-bf42-bbada0909163.json new file mode 100644 index 000000000..161f9729b --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-16/80c756a7-9d47-4b49-bf42-bbada0909163.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-16/1762652580.279189", + "retrieved_timestamp": "1762652580.27919", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/test-16", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/test-16" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4599473840520929 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5330160713144172 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1095166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4224583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39303523936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-17/c9933c3d-98ab-4486-bd42-7c90f5ed3bd2.json b/data/hfopenllm_v2/jaspionjader/test-17/c9933c3d-98ab-4486-bd42-7c90f5ed3bd2.json new file mode 100644 index 000000000..31574db5e --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-17/c9933c3d-98ab-4486-bd42-7c90f5ed3bd2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-17/1762652580.279401", + "retrieved_timestamp": "1762652580.279402", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/test-17", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/test-17" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42674991245450955 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5329373895863633 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11027190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.429 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39286901595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-18/3f3eeca1-d401-436e-b7e6-5fa82c099270.json b/data/hfopenllm_v2/jaspionjader/test-18/3f3eeca1-d401-436e-b7e6-5fa82c099270.json new file mode 100644 index 000000000..fe928b74a --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-18/3f3eeca1-d401-436e-b7e6-5fa82c099270.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-18/1762652580.2796118", + "retrieved_timestamp": "1762652580.279613", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/test-18", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/test-18" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43916474953124374 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5317453097096507 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1148036253776435 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42506249999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39303523936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-19/ab7e0f6c-bca9-4f83-a4a0-5014c46e0512.json b/data/hfopenllm_v2/jaspionjader/test-19/ab7e0f6c-bca9-4f83-a4a0-5014c46e0512.json new file mode 100644 index 000000000..a067dba61 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-19/ab7e0f6c-bca9-4f83-a4a0-5014c46e0512.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-19/1762652580.279826", + "retrieved_timestamp": "1762652580.2798269", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/test-19", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/test-19" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44008896394898867 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5319373895863634 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1095166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4263958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39286901595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jaspionjader/test-20/6391f921-4de7-4e83-8bb2-8d0ef0b58d8f.json b/data/hfopenllm_v2/jaspionjader/test-20/6391f921-4de7-4e83-8bb2-8d0ef0b58d8f.json new file mode 100644 index 000000000..475ce5b67 --- /dev/null +++ b/data/hfopenllm_v2/jaspionjader/test-20/6391f921-4de7-4e83-8bb2-8d0ef0b58d8f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jaspionjader_test-20/1762652580.2800388", + "retrieved_timestamp": "1762652580.28004", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jaspionjader/test-20", + "developer": "jaspionjader", + "inference_platform": "unknown", + "id": "jaspionjader/test-20" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45292823042859615 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5327388877137041 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11178247734138973 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42506249999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39195478723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jebcarter/psyonic-cetacean-20B/f8461982-37ad-4975-8445-996bdc9e59ce.json b/data/hfopenllm_v2/jebcarter/psyonic-cetacean-20B/f8461982-37ad-4975-8445-996bdc9e59ce.json new file mode 100644 index 000000000..b37b7a553 --- /dev/null +++ b/data/hfopenllm_v2/jebcarter/psyonic-cetacean-20B/f8461982-37ad-4975-8445-996bdc9e59ce.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jebcarter_psyonic-cetacean-20B/1762652580.2807941", + "retrieved_timestamp": "1762652580.2807949", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jebcarter/psyonic-cetacean-20B", + "developer": "jebcarter", + "inference_platform": "unknown", + "id": "jebcarter/psyonic-cetacean-20B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25436619281284767 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4907386156835858 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01812688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46611458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28856382978723405 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 19.994 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jebish7/Llama-3-Nanda-10B-Chat/739c83a9-8ff7-48df-af0c-494891df487b.json b/data/hfopenllm_v2/jebish7/Llama-3-Nanda-10B-Chat/739c83a9-8ff7-48df-af0c-494891df487b.json new file mode 100644 index 000000000..b39af1407 --- /dev/null +++ b/data/hfopenllm_v2/jebish7/Llama-3-Nanda-10B-Chat/739c83a9-8ff7-48df-af0c-494891df487b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jebish7_Llama-3-Nanda-10B-Chat/1762652580.28106", + "retrieved_timestamp": "1762652580.2810612", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jebish7/Llama-3-Nanda-10B-Chat", + "developer": "jebish7", + "inference_platform": "unknown", + "id": "jebish7/Llama-3-Nanda-10B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2952831819572069 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4958605204321644 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.055891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4356041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3156582446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 9.985 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jebish7/Llama-3.1-8B-Instruct/cc65b968-d766-4825-85cd-c36872eb1986.json b/data/hfopenllm_v2/jebish7/Llama-3.1-8B-Instruct/cc65b968-d766-4825-85cd-c36872eb1986.json new file mode 100644 index 000000000..b48007a27 --- /dev/null +++ b/data/hfopenllm_v2/jebish7/Llama-3.1-8B-Instruct/cc65b968-d766-4825-85cd-c36872eb1986.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jebish7_Llama-3.1-8B-Instruct/1762652580.281322", + "retrieved_timestamp": "1762652580.281322", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jebish7/Llama-3.1-8B-Instruct", + "developer": "jebish7", + "inference_platform": "unknown", + "id": "jebish7/Llama-3.1-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5058345190760515 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5088388495224864 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15483383685800603 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3997916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3777426861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Base/70097d1f-8c48-49ab-b285-eebe2c85628e.json b/data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Base/70097d1f-8c48-49ab-b285-eebe2c85628e.json new file mode 100644 index 000000000..7ec5ede74 --- /dev/null +++ b/data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Base/70097d1f-8c48-49ab-b285-eebe2c85628e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jebish7_Nemotron-4-Mini-Hindi-4B-Base/1762652580.2815292", + "retrieved_timestamp": "1762652580.2815301", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jebish7/Nemotron-4-Mini-Hindi-4B-Base", + "developer": "jebish7", + "inference_platform": "unknown", + "id": "jebish7/Nemotron-4-Mini-Hindi-4B-Base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22848818911599 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3923566745600671 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027190332326283987 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42490625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25033244680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "NemotronForCausalLM", + "params_billions": 4.191 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Instruct/e108df0b-a1ce-4c07-b683-6d3b33fd3988.json b/data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Instruct/e108df0b-a1ce-4c07-b683-6d3b33fd3988.json new file mode 100644 index 000000000..7ea59c568 --- /dev/null +++ b/data/hfopenllm_v2/jebish7/Nemotron-4-Mini-Hindi-4B-Instruct/e108df0b-a1ce-4c07-b683-6d3b33fd3988.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jebish7_Nemotron-4-Mini-Hindi-4B-Instruct/1762652580.2817988", + "retrieved_timestamp": "1762652580.2818", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jebish7/Nemotron-4-Mini-Hindi-4B-Instruct", + "developer": "jebish7", + "inference_platform": "unknown", + "id": "jebish7/Nemotron-4-Mini-Hindi-4B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3345257250761313 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4040596055988545 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028700906344410877 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41529166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25947473404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "NemotronForCausalLM", + "params_billions": 4.191 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jebish7/Nemotron-Mini-4B-Instruct/77bd2442-4004-48cb-ba45-eeb1ffec2a39.json b/data/hfopenllm_v2/jebish7/Nemotron-Mini-4B-Instruct/77bd2442-4004-48cb-ba45-eeb1ffec2a39.json new file mode 100644 index 000000000..05ef5913a --- /dev/null +++ b/data/hfopenllm_v2/jebish7/Nemotron-Mini-4B-Instruct/77bd2442-4004-48cb-ba45-eeb1ffec2a39.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jebish7_Nemotron-Mini-4B-Instruct/1762652580.282024", + "retrieved_timestamp": "1762652580.282024", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jebish7/Nemotron-Mini-4B-Instruct", + "developer": "jebish7", + "inference_platform": "unknown", + "id": "jebish7/Nemotron-Mini-4B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37092026932982264 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4244475437312765 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0324773413897281 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47271875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27825797872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "NemotronForCausalLM", + "params_billions": 4.191 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jebish7/aya-expanse-8b/70f2cb5c-feb3-44ac-9346-7ff60137e1c7.json b/data/hfopenllm_v2/jebish7/aya-expanse-8b/70f2cb5c-feb3-44ac-9346-7ff60137e1c7.json new file mode 100644 index 000000000..9065b9b79 --- /dev/null +++ b/data/hfopenllm_v2/jebish7/aya-expanse-8b/70f2cb5c-feb3-44ac-9346-7ff60137e1c7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jebish7_aya-expanse-8b/1762652580.282242", + "retrieved_timestamp": "1762652580.282243", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jebish7/aya-expanse-8b", + "developer": "jebish7", + "inference_platform": "unknown", + "id": "jebish7/aya-expanse-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37911408396388246 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.496904421264497 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08157099697885196 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3868958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31025598404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "CohereForCausalLM", + "params_billions": 8.028 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jeonsworld/CarbonVillain-en-10.7B-v4/bd67084e-d9ca-43c4-ab6e-3fbe8a1fb782.json b/data/hfopenllm_v2/jeonsworld/CarbonVillain-en-10.7B-v4/bd67084e-d9ca-43c4-ab6e-3fbe8a1fb782.json new file mode 100644 index 000000000..d8ef44b64 --- /dev/null +++ b/data/hfopenllm_v2/jeonsworld/CarbonVillain-en-10.7B-v4/bd67084e-d9ca-43c4-ab6e-3fbe8a1fb782.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jeonsworld_CarbonVillain-en-10.7B-v4/1762652580.2876348", + "retrieved_timestamp": "1762652580.287636", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jeonsworld/CarbonVillain-en-10.7B-v4", + "developer": "jeonsworld", + "inference_platform": "unknown", + "id": "jeonsworld/CarbonVillain-en-10.7B-v4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45792386423578324 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.516795955873779 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04682779456193353 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3965416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31416223404255317 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jieliu/Storm-7B/f521cb33-487e-4636-9039-fe1af3e090f2.json b/data/hfopenllm_v2/jieliu/Storm-7B/f521cb33-487e-4636-9039-fe1af3e090f2.json new file mode 100644 index 000000000..fe1551da4 --- /dev/null +++ b/data/hfopenllm_v2/jieliu/Storm-7B/f521cb33-487e-4636-9039-fe1af3e090f2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jieliu_Storm-7B/1762652580.288308", + "retrieved_timestamp": "1762652580.288309", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jieliu/Storm-7B", + "developer": "jieliu", + "inference_platform": "unknown", + "id": "jieliu/Storm-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3424192254329623 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5187285371254579 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4428958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3119182180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jiviai/medX_v2/386bc585-73ed-443e-b8ce-8723c533e41b.json b/data/hfopenllm_v2/jiviai/medX_v2/386bc585-73ed-443e-b8ce-8723c533e41b.json new file mode 100644 index 000000000..f8565daa0 --- /dev/null +++ b/data/hfopenllm_v2/jiviai/medX_v2/386bc585-73ed-443e-b8ce-8723c533e41b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jiviai_medX_v2/1762652580.288615", + "retrieved_timestamp": "1762652580.288616", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jiviai/medX_v2", + "developer": "jiviai", + "inference_platform": "unknown", + "id": "jiviai/medX_v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37431792089433813 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4508721125093523 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34984375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34283577127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jlzhou/Qwen2.5-3B-Infinity-Instruct-0625/09585af5-dd80-4418-8f58-c6ae718a1eee.json b/data/hfopenllm_v2/jlzhou/Qwen2.5-3B-Infinity-Instruct-0625/09585af5-dd80-4418-8f58-c6ae718a1eee.json new file mode 100644 index 000000000..32796fcf0 --- /dev/null +++ b/data/hfopenllm_v2/jlzhou/Qwen2.5-3B-Infinity-Instruct-0625/09585af5-dd80-4418-8f58-c6ae718a1eee.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jlzhou_Qwen2.5-3B-Infinity-Instruct-0625/1762652580.288917", + "retrieved_timestamp": "1762652580.288918", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jlzhou/Qwen2.5-3B-Infinity-Instruct-0625", + "developer": "jlzhou", + "inference_platform": "unknown", + "id": "jlzhou/Qwen2.5-3B-Infinity-Instruct-0625" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35575827692744144 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4773774601029352 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13670694864048338 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39809374999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3198969414893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01/f7207c82-5fc7-447a-b532-42bdb77ecfb4.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01/f7207c82-5fc7-447a-b532-42bdb77ecfb4.json new file mode 100644 index 000000000..f563fa644 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01/f7207c82-5fc7-447a-b532-42bdb77ecfb4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01/1762652580.289233", + "retrieved_timestamp": "1762652580.289234", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42712447417297217 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5035519809362171 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221476510067114 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4637604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37391954787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1/592dcd83-1adb-4193-add2-fb0ae66ea7ee.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1/592dcd83-1adb-4193-add2-fb0ae66ea7ee.json new file mode 100644 index 000000000..a0ff054ef --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1/592dcd83-1adb-4193-add2-fb0ae66ea7ee.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1/1762652580.289527", + "retrieved_timestamp": "1762652580.2895281", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42532591302189304 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5018845446835877 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09667673716012085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41502083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37242353723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01/2c82f973-c6cb-4aa2-9121-51bb0343aae4.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01/2c82f973-c6cb-4aa2-9121-51bb0343aae4.json new file mode 100644 index 000000000..25216d2af --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01/2c82f973-c6cb-4aa2-9121-51bb0343aae4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01/1762652580.2897432", + "retrieved_timestamp": "1762652580.289744", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33774828565982706 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4917135045463188 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5017708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3533078457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1/42b63cfd-3b06-4363-bf78-40c40da10299.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1/42b63cfd-3b06-4363-bf78-40c40da10299.json new file mode 100644 index 000000000..fab251ce5 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1/42b63cfd-3b06-4363-bf78-40c40da10299.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1/1762652580.289967", + "retrieved_timestamp": "1762652580.289967", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4273993005226133 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5125777877188348 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08081570996978851 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42264583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37391954787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01/cd4acb74-9433-435c-b0e9-9750fa52e3c0.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01/cd4acb74-9433-435c-b0e9-9750fa52e3c0.json new file mode 100644 index 000000000..5e2032caa --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01/cd4acb74-9433-435c-b0e9-9750fa52e3c0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01/1762652580.2902021", + "retrieved_timestamp": "1762652580.2902029", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32036219453272874 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48835763921755193 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0037764350453172208 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5097708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33444148936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1/e9a9ec78-4ada-4ce4-ad92-c27332279f84.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1/e9a9ec78-4ada-4ce4-ad92-c27332279f84.json new file mode 100644 index 000000000..f38eb28a7 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1/e9a9ec78-4ada-4ce4-ad92-c27332279f84.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1/1762652580.290431", + "retrieved_timestamp": "1762652580.290432", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43963904661852776 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5140041302485145 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08006042296072508 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43979166666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36959773936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01/b4e42076-bbff-4179-897d-b45a0e959020.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01/b4e42076-bbff-4179-897d-b45a0e959020.json new file mode 100644 index 000000000..778b6ad2a --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01/b4e42076-bbff-4179-897d-b45a0e959020.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01/1762652580.290661", + "retrieved_timestamp": "1762652580.2906618", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2814443454478561 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4854325756272537 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0022658610271903325 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5163125000000001 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3295378989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1/4017ff46-f389-4024-be9c-4360b0b6e64c.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1/4017ff46-f389-4024-be9c-4360b0b6e64c.json new file mode 100644 index 000000000..a1941a056 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1/4017ff46-f389-4024-be9c-4360b0b6e64c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1/1762652580.2908769", + "retrieved_timestamp": "1762652580.290878", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4302218114602588 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5157097379648965 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06268882175226587 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43315624999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36627327127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01/6bef1092-ece2-4aeb-8dbe-0e1a02c95f2f.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01/6bef1092-ece2-4aeb-8dbe-0e1a02c95f2f.json new file mode 100644 index 000000000..0f91aeaf1 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01/6bef1092-ece2-4aeb-8dbe-0e1a02c95f2f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01/1762652580.2910998", + "retrieved_timestamp": "1762652580.291101", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2789963962286732 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48611535229340735 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0015105740181268882 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5150104166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3304521276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1/872cddea-7a06-4b80-9243-423bf49c222c.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1/872cddea-7a06-4b80-9243-423bf49c222c.json new file mode 100644 index 000000000..c700c8a9d --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1/872cddea-7a06-4b80-9243-423bf49c222c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1/1762652580.291321", + "retrieved_timestamp": "1762652580.291322", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4222784434190171 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5153764046315631 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07401812688821752 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4384270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3650265957446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01/9dfd4a1b-fa18-4d54-a7bd-a519f87b532b.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01/9dfd4a1b-fa18-4d54-a7bd-a519f87b532b.json new file mode 100644 index 000000000..a14a1fe4a --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01/9dfd4a1b-fa18-4d54-a7bd-a519f87b532b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01/1762652580.291548", + "retrieved_timestamp": "1762652580.291548", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4358923212631374 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5040935986635269 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04833836858006042 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45315625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3762466755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1/184a8906-d998-4e03-bf6f-f66ca904a7b7.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1/184a8906-d998-4e03-bf6f-f66ca904a7b7.json new file mode 100644 index 000000000..2f62582bf --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1/184a8906-d998-4e03-bf6f-f66ca904a7b7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1/1762652580.291779", + "retrieved_timestamp": "1762652580.29178", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4201551882338861 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.501124270710985 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09818731117824774 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41502083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3699301861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01/11f14586-5f0c-4e0b-b41e-f3e0f298b781.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01/11f14586-5f0c-4e0b-b41e-f3e0f298b781.json new file mode 100644 index 000000000..88d236121 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01/11f14586-5f0c-4e0b-b41e-f3e0f298b781.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01/1762652580.292005", + "retrieved_timestamp": "1762652580.2920058", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35178659290682057 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49985217584312186 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.023413897280966767 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48710416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3611203457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1/3b9966ca-8157-4f32-b276-9d36dd1045e1.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1/3b9966ca-8157-4f32-b276-9d36dd1045e1.json new file mode 100644 index 000000000..1e2fa1ded --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1/3b9966ca-8157-4f32-b276-9d36dd1045e1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1/1762652580.2922251", + "retrieved_timestamp": "1762652580.292226", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42038014689911657 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5107301269172088 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08761329305135952 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42785416666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37101063829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01/414c1eec-86bc-4d86-a014-2ea586eebfb1.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01/414c1eec-86bc-4d86-a014-2ea586eebfb1.json new file mode 100644 index 000000000..154bdbf07 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01/414c1eec-86bc-4d86-a014-2ea586eebfb1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01/1762652580.292447", + "retrieved_timestamp": "1762652580.292447", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34541682735142754 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4983827321097329 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49113541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3531416223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1/c9e8c1d4-c031-4f90-a14b-30633e75f2c3.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1/c9e8c1d4-c031-4f90-a14b-30633e75f2c3.json new file mode 100644 index 000000000..6cfb4e725 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1/c9e8c1d4-c031-4f90-a14b-30633e75f2c3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1/1762652580.292675", + "retrieved_timestamp": "1762652580.2926762", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40916435058976847 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.513665952913411 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08081570996978851 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43569791666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.366938164893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01/4532b233-abbc-4fbd-ba77-801eb1398361.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01/4532b233-abbc-4fbd-ba77-801eb1398361.json new file mode 100644 index 000000000..420903c9b --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01/4532b233-abbc-4fbd-ba77-801eb1398361.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01/1762652580.292904", + "retrieved_timestamp": "1762652580.2929049", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29038728351884113 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4967337534367295 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4990729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34898603723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1/d5916658-91c3-418f-9cd6-c49dcc8927a3.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1/d5916658-91c3-418f-9cd6-c49dcc8927a3.json new file mode 100644 index 000000000..2a22ca815 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1/d5916658-91c3-418f-9cd6-c49dcc8927a3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1/1762652580.2931998", + "retrieved_timestamp": "1762652580.293205", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41988036188424493 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5146905664948336 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08081570996978851 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43576041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3615359042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01/a29cab83-e937-4a2a-a9fd-986fd1c67e03.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01/a29cab83-e937-4a2a-a9fd-986fd1c67e03.json new file mode 100644 index 000000000..653884b65 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01/a29cab83-e937-4a2a-a9fd-986fd1c67e03.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01/1762652580.293625", + "retrieved_timestamp": "1762652580.293626", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29131149793658606 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49182964384768835 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4976770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34541223404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1/2aae97a9-6d0a-438d-9f74-e7a30e85face.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1/2aae97a9-6d0a-438d-9f74-e7a30e85face.json new file mode 100644 index 000000000..48b0a64ba --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1/2aae97a9-6d0a-438d-9f74-e7a30e85face.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1/1762652580.293948", + "retrieved_timestamp": "1762652580.293949", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41623337189767595 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5138610942606995 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07779456193353475 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43172916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3624501329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_linear/060fe548-f690-4492-9c0f-ada0210b0386.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_linear/060fe548-f690-4492-9c0f-ada0210b0386.json new file mode 100644 index 000000000..6b0fa74ce --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_linear/060fe548-f690-4492-9c0f-ada0210b0386.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_dare_linear/1762652580.294196", + "retrieved_timestamp": "1762652580.294197", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_dare_linear", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_dare_linear" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21454961723781787 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4282807940700452 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49792708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24143949468085107 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1/7d709f22-c4e8-4903-b924-a86728dcf26b.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1/7d709f22-c4e8-4903-b924-a86728dcf26b.json new file mode 100644 index 000000000..4f699a3aa --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1/7d709f22-c4e8-4903-b924-a86728dcf26b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.1/1762652580.2944481", + "retrieved_timestamp": "1762652580.2944489", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18907055501624578 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41187360174735804 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0007552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46580208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22647938829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3/c45c03dd-efbe-4c86-a07d-e7831210e017.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3/c45c03dd-efbe-4c86-a07d-e7831210e017.json new file mode 100644 index 000000000..fa5bbb293 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3/c45c03dd-efbe-4c86-a07d-e7831210e017.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.3/1762652580.294691", + "retrieved_timestamp": "1762652580.294692", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21132705665412216 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4558569854124363 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0015105740181268882 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5069479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30402260638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7/3b51b346-a23c-4add-9623-86c9591eddd0.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7/3b51b346-a23c-4add-9623-86c9591eddd0.json new file mode 100644 index 000000000..ca2a9ca86 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7/3b51b346-a23c-4add-9623-86c9591eddd0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.7/1762652580.2949278", + "retrieved_timestamp": "1762652580.2949288", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20338368861288048 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4722858888388635 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0030211480362537764 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5110104166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3148271276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9/35557106-88b1-4f6a-bf33-17ea6744f208.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9/35557106-88b1-4f6a-bf33-17ea6744f208.json new file mode 100644 index 000000000..dd1b2c701 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9/35557106-88b1-4f6a-bf33-17ea6744f208.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.9/1762652580.29516", + "retrieved_timestamp": "1762652580.295161", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21607335203925582 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46639610671811504 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0015105740181268882 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5230416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3143284574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_linear/89b55a5a-8f83-4a87-906a-32c1e84b8220.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_linear/89b55a5a-8f83-4a87-906a-32c1e84b8220.json new file mode 100644 index 000000000..52c970e91 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_linear/89b55a5a-8f83-4a87-906a-32c1e84b8220.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_linear/1762652580.295396", + "retrieved_timestamp": "1762652580.295396", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_linear", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_linear" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4308213318439518 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5031496839210309 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10045317220543806 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40971874999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37117686170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.1/ec8e412e-96e8-43ae-98e1-f605228f3f6d.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.1/ec8e412e-96e8-43ae-98e1-f605228f3f6d.json new file mode 100644 index 000000000..93af6526f --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.1/ec8e412e-96e8-43ae-98e1-f605228f3f6d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_ties-density-0.1/1762652580.295634", + "retrieved_timestamp": "1762652580.295635", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_ties-density-0.1", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41161229980895137 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5021445196013956 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07930513595166164 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.417375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36003989361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.3/29b19ca6-ec5f-4ef1-9721-cb2199661873.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.3/29b19ca6-ec5f-4ef1-9721-cb2199661873.json new file mode 100644 index 000000000..9c2ab5856 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.3/29b19ca6-ec5f-4ef1-9721-cb2199661873.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_ties-density-0.3/1762652580.29586", + "retrieved_timestamp": "1762652580.295861", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_ties-density-0.3", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3626278274977061 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49061122520005807 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06722054380664652 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40248958333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33211436170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.5/12f38eb7-57be-45c6-a53a-9d4859413e94.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.5/12f38eb7-57be-45c6-a53a-9d4859413e94.json new file mode 100644 index 000000000..cb1cde62a --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.5/12f38eb7-57be-45c6-a53a-9d4859413e94.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_ties-density-0.5/1762652580.2960892", + "retrieved_timestamp": "1762652580.2960901", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_ties-density-0.5", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37966373666316483 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47931248948849836 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3879791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31748670212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.7/22ae576f-6bec-450f-812f-4315779be0a1.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.7/22ae576f-6bec-450f-812f-4315779be0a1.json new file mode 100644 index 000000000..b8f36b265 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.7/22ae576f-6bec-450f-812f-4315779be0a1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_ties-density-0.7/1762652580.296313", + "retrieved_timestamp": "1762652580.296314", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_ties-density-0.7", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3681232463197649 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4738186124296502 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06722054380664652 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3880729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3152426861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.9/76c364c1-1e67-4536-8f23-85f84f0cd554.json b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.9/76c364c1-1e67-4536-8f23-85f84f0cd554.json new file mode 100644 index 000000000..f43ca3ab2 --- /dev/null +++ b/data/hfopenllm_v2/johnsutor/Llama-3-8B-Instruct_ties-density-0.9/76c364c1-1e67-4536-8f23-85f84f0cd554.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_ties-density-0.9/1762652580.296535", + "retrieved_timestamp": "1762652580.296536", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "johnsutor/Llama-3-8B-Instruct_ties-density-0.9", + "developer": "johnsutor", + "inference_platform": "unknown", + "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.9" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3858085435533274 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47354321136013144 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.061933534743202415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3880416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3181515957446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-4k-DPO/fe0cfe19-b019-459e-a71d-46d55612a95e.json b/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-4k-DPO/fe0cfe19-b019-459e-a71d-46d55612a95e.json new file mode 100644 index 000000000..c4f4403ea --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-4k-DPO/fe0cfe19-b019-459e-a71d-46d55612a95e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-14B-Instruct-4k-DPO/1762652580.296761", + "retrieved_timestamp": "1762652580.2967622", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jpacifico/Chocolatine-14B-Instruct-4k-DPO", + "developer": "jpacifico", + "inference_platform": "unknown", + "id": "jpacifico/Chocolatine-14B-Instruct-4k-DPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4688648341954902 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6299582409761587 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1782477341389728 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3414429530201342 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44388541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4763962765957447 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.2/aae9e150-7992-4241-91af-0c55d03d709f.json b/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.2/aae9e150-7992-4241-91af-0c55d03d709f.json new file mode 100644 index 000000000..7684266ad --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.2/aae9e150-7992-4241-91af-0c55d03d709f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-14B-Instruct-DPO-v1.2/1762652580.297051", + "retrieved_timestamp": "1762652580.297052", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.2", + "developer": "jpacifico", + "inference_platform": "unknown", + "id": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6852107962428579 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6438408959901142 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20921450151057402 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32550335570469796 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4267708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46966422872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.3/b56c681a-592f-491a-aa0a-030848356563.json b/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.3/b56c681a-592f-491a-aa0a-030848356563.json new file mode 100644 index 000000000..b38e20ba3 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-14B-Instruct-DPO-v1.3/b56c681a-592f-491a-aa0a-030848356563.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-14B-Instruct-DPO-v1.3/1762652580.2973812", + "retrieved_timestamp": "1762652580.297384", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.3", + "developer": "jpacifico", + "inference_platform": "unknown", + "id": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.703995398874985 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6846125547592651 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5619335347432024 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3414429530201342 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42339583333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5374002659574468 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1/9ae740a8-6d7c-438c-942f-11ac0f6cbe79.json b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1/9ae740a8-6d7c-438c-942f-11ac0f6cbe79.json new file mode 100644 index 000000000..7d0823b7a --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1/9ae740a8-6d7c-438c-942f-11ac0f6cbe79.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-DPO-v2.0b1/1762652580.2977622", + "retrieved_timestamp": "1762652580.297763", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1", + "developer": "jpacifico", + "inference_platform": "unknown", + "id": "jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10334024831890495 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.669567432054888 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2756797583081571 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37583892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44673958333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5123836436170213 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.1/c68ca8a7-07d8-4295-a535-a573fc3893b7.json b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.1/c68ca8a7-07d8-4295-a535-a573fc3893b7.json new file mode 100644 index 000000000..64d3eafb0 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.1/c68ca8a7-07d8-4295-a535-a573fc3893b7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-v2.0.1/1762652580.298285", + "retrieved_timestamp": "1762652580.2982872", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.1", + "developer": "jpacifico", + "inference_platform": "unknown", + "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07421419611076388 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6736278064166185 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.479607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39177852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.50075 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5299202127659575 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.3/ccf2d437-d3e3-4a53-9249-e6df2fd04f49.json b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.3/ccf2d437-d3e3-4a53-9249-e6df2fd04f49.json new file mode 100644 index 000000000..3795ac4b3 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0.3/ccf2d437-d3e3-4a53-9249-e6df2fd04f49.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-v2.0.3/1762652580.298579", + "retrieved_timestamp": "1762652580.29858", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.3", + "developer": "jpacifico", + "inference_platform": "unknown", + "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7037205725253439 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6548026688308357 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4206948640483384 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37919463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47681250000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5374002659574468 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0/85b8aede-7eb3-4997-9529-2f7d4603fb9e.json b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0/85b8aede-7eb3-4997-9529-2f7d4603fb9e.json new file mode 100644 index 000000000..6982b36b5 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0/85b8aede-7eb3-4997-9529-2f7d4603fb9e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-v2.0/1762652580.2980192", + "retrieved_timestamp": "1762652580.2980192", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jpacifico/Chocolatine-2-14B-Instruct-v2.0", + "developer": "jpacifico", + "inference_platform": "unknown", + "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0885273297073986 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6769929749559443 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48036253776435045 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3875838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5021145833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5301695478723404 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b2/6837502d-0f08-48d8-b85e-70f3e07a2531.json b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b2/6837502d-0f08-48d8-b85e-70f3e07a2531.json new file mode 100644 index 000000000..0cc5f3cf4 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b2/6837502d-0f08-48d8-b85e-70f3e07a2531.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-v2.0b2/1762652580.298837", + "retrieved_timestamp": "1762652580.298838", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b2", + "developer": "jpacifico", + "inference_platform": "unknown", + "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7240787776433197 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6475822300543483 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3950151057401813 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38338926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48075 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5369015957446809 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b3/f345f9cb-7233-4f4e-8e8b-a0b607502d1d.json b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b3/f345f9cb-7233-4f4e-8e8b-a0b607502d1d.json new file mode 100644 index 000000000..c57b88ce3 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-2-14B-Instruct-v2.0b3/f345f9cb-7233-4f4e-8e8b-a0b607502d1d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-v2.0b3/1762652580.2990808", + "retrieved_timestamp": "1762652580.299082", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b3", + "developer": "jpacifico", + "inference_platform": "unknown", + "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7322969720342026 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.646878884179919 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4108761329305136 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37919463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47811458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5337433510638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-Revised/08a646ba-9b4a-483e-8adf-f4e203a9be5d.json b/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-Revised/08a646ba-9b4a-483e-8adf-f4e203a9be5d.json new file mode 100644 index 000000000..6245567dd --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-Revised/08a646ba-9b4a-483e-8adf-f4e203a9be5d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-3B-Instruct-DPO-Revised/1762652580.299312", + "retrieved_timestamp": "1762652580.299314", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jpacifico/Chocolatine-3B-Instruct-DPO-Revised", + "developer": "jpacifico", + "inference_platform": "unknown", + "id": "jpacifico/Chocolatine-3B-Instruct-DPO-Revised" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5622625744136669 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5539982344792619 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18051359516616314 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221476510067114 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44534375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3988530585106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.0/7f969b69-cb14-4291-a15f-60f2b56e23ad.json b/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.0/7f969b69-cb14-4291-a15f-60f2b56e23ad.json new file mode 100644 index 000000000..ab93a7a1b --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.0/7f969b69-cb14-4291-a15f-60f2b56e23ad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-3B-Instruct-DPO-v1.0/1762652580.29967", + "retrieved_timestamp": "1762652580.299671", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.0", + "developer": "jpacifico", + "inference_platform": "unknown", + "id": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3737184005106451 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5471398082537478 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1782477341389728 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4754791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3937001329787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.2/f34988e6-20f5-4d77-9233-70d5bc6193fb.json b/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.2/f34988e6-20f5-4d77-9233-70d5bc6193fb.json new file mode 100644 index 000000000..89805e5f3 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Chocolatine-3B-Instruct-DPO-v1.2/f34988e6-20f5-4d77-9233-70d5bc6193fb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-3B-Instruct-DPO-v1.2/1762652580.300061", + "retrieved_timestamp": "1762652580.300063", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.2", + "developer": "jpacifico", + "inference_platform": "unknown", + "id": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5455014915978493 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5487182027245813 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20468277945619334 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3389261744966443 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41542708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3877160904255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1/8ea866ce-c4a8-4981-b221-ee7b2dc898cd.json b/data/hfopenllm_v2/jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1/8ea866ce-c4a8-4981-b221-ee7b2dc898cd.json new file mode 100644 index 000000000..b3b67bd77 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1/8ea866ce-c4a8-4981-b221-ee7b2dc898cd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jpacifico_Distilucie-7B-Math-Instruct-DPO-v0.1/1762652580.300392", + "retrieved_timestamp": "1762652580.3003929", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1", + "developer": "jpacifico", + "inference_platform": "unknown", + "id": "jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30475028479988653 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38346961466103785 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0256797583081571 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3644479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1809341755319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.707 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1.3/643a510c-b9f4-4222-a1b0-09d7d5434de8.json b/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1.3/643a510c-b9f4-4222-a1b0-09d7d5434de8.json new file mode 100644 index 000000000..de205c889 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1.3/643a510c-b9f4-4222-a1b0-09d7d5434de8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jpacifico_Lucie-7B-Instruct-DPO-v1.1.3/1762652580.3010209", + "retrieved_timestamp": "1762652580.301022", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jpacifico/Lucie-7B-Instruct-DPO-v1.1.3", + "developer": "jpacifico", + "inference_platform": "unknown", + "id": "jpacifico/Lucie-7B-Instruct-DPO-v1.1.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3044754584502453 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.381900181819828 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02416918429003021 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38178124999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1763630319148936 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.707 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1/ad0aa0da-dac4-42a9-ae62-ebe03aa40643.json b/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1/ad0aa0da-dac4-42a9-ae62-ebe03aa40643.json new file mode 100644 index 000000000..a45821819 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-DPO-v1.1/ad0aa0da-dac4-42a9-ae62-ebe03aa40643.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jpacifico_Lucie-7B-Instruct-DPO-v1.1/1762652580.300676", + "retrieved_timestamp": "1762652580.300677", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jpacifico/Lucie-7B-Instruct-DPO-v1.1", + "developer": "jpacifico", + "inference_platform": "unknown", + "id": "jpacifico/Lucie-7B-Instruct-DPO-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31209413245743517 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37810118011411814 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.023413897280966767 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40159374999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18375997340425532 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.707 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0/f28fc4d7-d3eb-4915-967a-db97667e85bc.json b/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0/f28fc4d7-d3eb-4915-967a-db97667e85bc.json new file mode 100644 index 000000000..af6df202e --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0/f28fc4d7-d3eb-4915-967a-db97667e85bc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jpacifico_Lucie-7B-Instruct-Merged-Model_Stock-v1.0/1762652580.3014882", + "retrieved_timestamp": "1762652580.3014889", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0", + "developer": "jpacifico", + "inference_platform": "unknown", + "id": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32335979645119395 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3802022135816421 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02416918429003021 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38438541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1870844414893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.707 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1/03e7b19a-c31a-4bd4-8560-3b8ac4c7c80c.json b/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1/03e7b19a-c31a-4bd4-8560-3b8ac4c7c80c.json new file mode 100644 index 000000000..8e1368179 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1/03e7b19a-c31a-4bd4-8560-3b8ac4c7c80c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jpacifico_Lucie-7B-Instruct-Merged-Model_Stock-v1.1/1762652580.301858", + "retrieved_timestamp": "1762652580.3018591", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1", + "developer": "jpacifico", + "inference_platform": "unknown", + "id": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30142798884736943 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38078615414710804 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37502083333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18617021276595744 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.707 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jpacifico/Lucie-Boosted-7B-Instruct/4c7575d2-d538-4767-8d7e-d905b11f84f9.json b/data/hfopenllm_v2/jpacifico/Lucie-Boosted-7B-Instruct/4c7575d2-d538-4767-8d7e-d905b11f84f9.json new file mode 100644 index 000000000..014694a68 --- /dev/null +++ b/data/hfopenllm_v2/jpacifico/Lucie-Boosted-7B-Instruct/4c7575d2-d538-4767-8d7e-d905b11f84f9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jpacifico_Lucie-Boosted-7B-Instruct/1762652580.302166", + "retrieved_timestamp": "1762652580.3021681", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jpacifico/Lucie-Boosted-7B-Instruct", + "developer": "jpacifico", + "inference_platform": "unknown", + "id": "jpacifico/Lucie-Boosted-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25661467129438775 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34654827210803724 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.369875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1629820478723404 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.707 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jsfs11/L3-8B-Stheno-slerp/4148a653-5fda-41c2-bf7e-1c03d385b7a1.json b/data/hfopenllm_v2/jsfs11/L3-8B-Stheno-slerp/4148a653-5fda-41c2-bf7e-1c03d385b7a1.json new file mode 100644 index 000000000..3542b6181 --- /dev/null +++ b/data/hfopenllm_v2/jsfs11/L3-8B-Stheno-slerp/4148a653-5fda-41c2-bf7e-1c03d385b7a1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jsfs11_L3-8B-Stheno-slerp/1762652580.302513", + "retrieved_timestamp": "1762652580.302515", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jsfs11/L3-8B-Stheno-slerp", + "developer": "jsfs11", + "inference_platform": "unknown", + "id": "jsfs11/L3-8B-Stheno-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6751940407008958 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5325675903618755 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09894259818731117 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3725416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36494348404255317 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v4/8143abf5-bd1d-4cdd-b555-5135f04945c3.json b/data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v4/8143abf5-bd1d-4cdd-b555-5135f04945c3.json new file mode 100644 index 000000000..112674ce2 --- /dev/null +++ b/data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v4/8143abf5-bd1d-4cdd-b555-5135f04945c3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jsfs11_MixtureofMerges-MoE-4x7b-v4/1762652580.302909", + "retrieved_timestamp": "1762652580.3029099", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jsfs11/MixtureofMerges-MoE-4x7b-v4", + "developer": "jsfs11", + "inference_platform": "unknown", + "id": "jsfs11/MixtureofMerges-MoE-4x7b-v4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40299405577201824 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5169007103786006 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43855208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30319148936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v5/a452af19-e167-45ca-99d2-5def2e4ad774.json b/data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v5/a452af19-e167-45ca-99d2-5def2e4ad774.json new file mode 100644 index 000000000..fcbd3be3c --- /dev/null +++ b/data/hfopenllm_v2/jsfs11/MixtureofMerges-MoE-4x7b-v5/a452af19-e167-45ca-99d2-5def2e4ad774.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jsfs11_MixtureofMerges-MoE-4x7b-v5/1762652580.30316", + "retrieved_timestamp": "1762652580.30316", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jsfs11/MixtureofMerges-MoE-4x7b-v5", + "developer": "jsfs11", + "inference_platform": "unknown", + "id": "jsfs11/MixtureofMerges-MoE-4x7b-v5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41993022956865567 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5198481257083689 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0755287009063444 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4304895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3097573138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kaist-ai/janus-7b/3ab8b78b-a9f9-428c-9469-afaa4158a0a6.json b/data/hfopenllm_v2/kaist-ai/janus-7b/3ab8b78b-a9f9-428c-9469-afaa4158a0a6.json new file mode 100644 index 000000000..0b29fc2c4 --- /dev/null +++ b/data/hfopenllm_v2/kaist-ai/janus-7b/3ab8b78b-a9f9-428c-9469-afaa4158a0a6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/kaist-ai_janus-7b/1762652580.303385", + "retrieved_timestamp": "1762652580.3033862", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kaist-ai/janus-7b", + "developer": "kaist-ai", + "inference_platform": "unknown", + "id": "kaist-ai/janus-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37751499355044615 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4693667591541633 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4401041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28740026595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kaist-ai/janus-dpo-7b/2a78f22b-d898-4f92-a2a5-c2930c16916c.json b/data/hfopenllm_v2/kaist-ai/janus-dpo-7b/2a78f22b-d898-4f92-a2a5-c2930c16916c.json new file mode 100644 index 000000000..21b29e6d8 --- /dev/null +++ b/data/hfopenllm_v2/kaist-ai/janus-dpo-7b/2a78f22b-d898-4f92-a2a5-c2930c16916c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/kaist-ai_janus-dpo-7b/1762652580.303661", + "retrieved_timestamp": "1762652580.303662", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kaist-ai/janus-dpo-7b", + "developer": "kaist-ai", + "inference_platform": "unknown", + "id": "kaist-ai/janus-dpo-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4002712802031942 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4772581104894978 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04154078549848943 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43873958333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2976230053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kaist-ai/janus-rm-7b/46f57920-759b-4d1a-b2f5-fe66aa740170.json b/data/hfopenllm_v2/kaist-ai/janus-rm-7b/46f57920-759b-4d1a-b2f5-fe66aa740170.json new file mode 100644 index 000000000..a9c9a4881 --- /dev/null +++ b/data/hfopenllm_v2/kaist-ai/janus-rm-7b/46f57920-759b-4d1a-b2f5-fe66aa740170.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/kaist-ai_janus-rm-7b/1762652580.303882", + "retrieved_timestamp": "1762652580.303883", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kaist-ai/janus-rm-7b", + "developer": "kaist-ai", + "inference_platform": "unknown", + "id": "kaist-ai/janus-rm-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.177804891022487 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3056467446788138 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38829166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11261635638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LLMForSequenceRegression", + "params_billions": 7.111 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kavonalds/BunderMaxx-0710/10be7d08-18a9-43a6-80ea-81d704600eab.json b/data/hfopenllm_v2/kavonalds/BunderMaxx-0710/10be7d08-18a9-43a6-80ea-81d704600eab.json new file mode 100644 index 000000000..46c83b124 --- /dev/null +++ b/data/hfopenllm_v2/kavonalds/BunderMaxx-0710/10be7d08-18a9-43a6-80ea-81d704600eab.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/kavonalds_BunderMaxx-0710/1762652580.304877", + "retrieved_timestamp": "1762652580.3048792", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kavonalds/BunderMaxx-0710", + "developer": "kavonalds", + "inference_platform": "unknown", + "id": "kavonalds/BunderMaxx-0710" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27007894608527594 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.556586279503196 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3681979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1449468085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kavonalds/BunderMaxx-0710/63d646bf-14d2-4cc7-ab82-efd1645cc1ba.json b/data/hfopenllm_v2/kavonalds/BunderMaxx-0710/63d646bf-14d2-4cc7-ab82-efd1645cc1ba.json new file mode 100644 index 000000000..82ac95adc --- /dev/null +++ b/data/hfopenllm_v2/kavonalds/BunderMaxx-0710/63d646bf-14d2-4cc7-ab82-efd1645cc1ba.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/kavonalds_BunderMaxx-0710/1762652580.3044312", + "retrieved_timestamp": "1762652580.3044322", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kavonalds/BunderMaxx-0710", + "developer": "kavonalds", + "inference_platform": "unknown", + "id": "kavonalds/BunderMaxx-0710" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32825569488955975 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6650758850169982 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3393333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13139960106382978 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kavonalds/BunderMaxx-1010/6b0275ea-f2eb-4a37-922c-d1f734c1a6d3.json b/data/hfopenllm_v2/kavonalds/BunderMaxx-1010/6b0275ea-f2eb-4a37-922c-d1f734c1a6d3.json new file mode 100644 index 000000000..e0023f78b --- /dev/null +++ b/data/hfopenllm_v2/kavonalds/BunderMaxx-1010/6b0275ea-f2eb-4a37-922c-d1f734c1a6d3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/kavonalds_BunderMaxx-1010/1762652580.305197", + "retrieved_timestamp": "1762652580.3051982", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kavonalds/BunderMaxx-1010", + "developer": "kavonalds", + "inference_platform": "unknown", + "id": "kavonalds/BunderMaxx-1010" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2980558252104416 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7019840419971701 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10498489425981873 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3484479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12242353723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kavonalds/Lancer-1-1b-Instruct/ae2afa83-4607-43ea-be11-86cc57f3b848.json b/data/hfopenllm_v2/kavonalds/Lancer-1-1b-Instruct/ae2afa83-4607-43ea-be11-86cc57f3b848.json new file mode 100644 index 000000000..0f6bfe2fc --- /dev/null +++ b/data/hfopenllm_v2/kavonalds/Lancer-1-1b-Instruct/ae2afa83-4607-43ea-be11-86cc57f3b848.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/kavonalds_Lancer-1-1b-Instruct/1762652580.305463", + "retrieved_timestamp": "1762652580.305465", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kavonalds/Lancer-1-1b-Instruct", + "developer": "kavonalds", + "inference_platform": "unknown", + "id": "kavonalds/Lancer-1-1b-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5545940327220664 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32532742727549835 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03927492447129909 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3144375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1568317819148936 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kekmodel/StopCarbon-10.7B-v5/25b7d35b-8b5f-44ac-afae-e0f71ba8a0ff.json b/data/hfopenllm_v2/kekmodel/StopCarbon-10.7B-v5/25b7d35b-8b5f-44ac-afae-e0f71ba8a0ff.json new file mode 100644 index 000000000..2ed43dd17 --- /dev/null +++ b/data/hfopenllm_v2/kekmodel/StopCarbon-10.7B-v5/25b7d35b-8b5f-44ac-afae-e0f71ba8a0ff.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/kekmodel_StopCarbon-10.7B-v5/1762652580.306321", + "retrieved_timestamp": "1762652580.3063219", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kekmodel/StopCarbon-10.7B-v5", + "developer": "kekmodel", + "inference_platform": "unknown", + "id": "kekmodel/StopCarbon-10.7B-v5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47283651821611106 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5177716413471513 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.055891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4019375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3156582446808511 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/khoantap/cheap-moe-merge/9ef977af-b10c-4434-bf4c-9783903e75a9.json b/data/hfopenllm_v2/khoantap/cheap-moe-merge/9ef977af-b10c-4434-bf4c-9783903e75a9.json new file mode 100644 index 000000000..b742e29b3 --- /dev/null +++ b/data/hfopenllm_v2/khoantap/cheap-moe-merge/9ef977af-b10c-4434-bf4c-9783903e75a9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/khoantap_cheap-moe-merge/1762652580.3070369", + "retrieved_timestamp": "1762652580.307038", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "khoantap/cheap-moe-merge", + "developer": "khoantap", + "inference_platform": "unknown", + "id": "khoantap/cheap-moe-merge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4557008736818309 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.513116897226939 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09214501510574018 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4103020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3338597074468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 19.305 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/khoantap/moe-out-merge/326fc05a-78e9-4e36-933c-aa0219661e0d.json b/data/hfopenllm_v2/khoantap/moe-out-merge/326fc05a-78e9-4e36-933c-aa0219661e0d.json new file mode 100644 index 000000000..205b3a59c --- /dev/null +++ b/data/hfopenllm_v2/khoantap/moe-out-merge/326fc05a-78e9-4e36-933c-aa0219661e0d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/khoantap_moe-out-merge/1762652580.309191", + "retrieved_timestamp": "1762652580.309192", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "khoantap/moe-out-merge", + "developer": "khoantap", + "inference_platform": "unknown", + "id": "khoantap/moe-out-merge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4504802812094133 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.515116897226939 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09290030211480363 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40630208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3347739361702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2MoeForCausalLM", + "params_billions": 19.305 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1/6cb03909-9850-4519-9e67-f2d875652e02.json b/data/hfopenllm_v2/kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1/6cb03909-9850-4519-9e67-f2d875652e02.json new file mode 100644 index 000000000..f80fc8731 --- /dev/null +++ b/data/hfopenllm_v2/kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1/6cb03909-9850-4519-9e67-f2d875652e02.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/kms7530_chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1/1762652580.309702", + "retrieved_timestamp": "1762652580.3097029", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1", + "developer": "kms7530", + "inference_platform": "unknown", + "id": "kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5455014915978493 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42890394469736065 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.061933534743202415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38206249999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2798371010638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 9.3 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath/51a11592-e099-4059-9e97-f8924e1c2437.json b/data/hfopenllm_v2/kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath/51a11592-e099-4059-9e97-f8924e1c2437.json new file mode 100644 index 000000000..b800cd437 --- /dev/null +++ b/data/hfopenllm_v2/kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath/51a11592-e099-4059-9e97-f8924e1c2437.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/kms7530_chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath/1762652580.309973", + "retrieved_timestamp": "1762652580.309974", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath", + "developer": "kms7530", + "inference_platform": "unknown", + "id": "kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4863251727638222 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49871846432893613 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10800604229607251 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39828125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3480718085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 4.132 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kno10/ende-chat-0.0.5/af2f11cf-8efa-4c71-a0b2-74f953b8e61b.json b/data/hfopenllm_v2/kno10/ende-chat-0.0.5/af2f11cf-8efa-4c71-a0b2-74f953b8e61b.json new file mode 100644 index 000000000..7feb30583 --- /dev/null +++ b/data/hfopenllm_v2/kno10/ende-chat-0.0.5/af2f11cf-8efa-4c71-a0b2-74f953b8e61b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/kno10_ende-chat-0.0.5/1762652580.310679", + "retrieved_timestamp": "1762652580.3106802", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kno10/ende-chat-0.0.5", + "developer": "kno10", + "inference_platform": "unknown", + "id": "kno10/ende-chat-0.0.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3404455733010634 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3604365707523862 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39384375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17902260638297873 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.891 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kno10/ende-chat-0.0.7/6619dec7-71cf-4be6-90e2-815e8dd4e56f.json b/data/hfopenllm_v2/kno10/ende-chat-0.0.7/6619dec7-71cf-4be6-90e2-815e8dd4e56f.json new file mode 100644 index 000000000..a575c8d71 --- /dev/null +++ b/data/hfopenllm_v2/kno10/ende-chat-0.0.7/6619dec7-71cf-4be6-90e2-815e8dd4e56f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/kno10_ende-chat-0.0.7/1762652580.310943", + "retrieved_timestamp": "1762652580.310944", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kno10/ende-chat-0.0.7", + "developer": "kno10", + "inference_platform": "unknown", + "id": "kno10/ende-chat-0.0.7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.440063476021401 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37918745577624335 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.017371601208459216 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.386125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19664228723404256 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.891 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kyutai/helium-1-preview-2b/ce4ddb86-646e-4c59-8a03-3687dbb77021.json b/data/hfopenllm_v2/kyutai/helium-1-preview-2b/ce4ddb86-646e-4c59-8a03-3687dbb77021.json new file mode 100644 index 000000000..6d75b66d4 --- /dev/null +++ b/data/hfopenllm_v2/kyutai/helium-1-preview-2b/ce4ddb86-646e-4c59-8a03-3687dbb77021.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/kyutai_helium-1-preview-2b/1762652580.3111548", + "retrieved_timestamp": "1762652580.3111548", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kyutai/helium-1-preview-2b", + "developer": "kyutai", + "inference_platform": "unknown", + "id": "kyutai/helium-1-preview-2b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26136096667952147 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3638164815956466 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3549583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18725066489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "HeliumForCausalLM", + "params_billions": 2.173 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/kz919/QwQ-0.5B-Distilled-SFT/08efd69e-6ff6-48a1-b260-ddbb4a942d12.json b/data/hfopenllm_v2/kz919/QwQ-0.5B-Distilled-SFT/08efd69e-6ff6-48a1-b260-ddbb4a942d12.json new file mode 100644 index 000000000..273a2c47d --- /dev/null +++ b/data/hfopenllm_v2/kz919/QwQ-0.5B-Distilled-SFT/08efd69e-6ff6-48a1-b260-ddbb4a942d12.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/kz919_QwQ-0.5B-Distilled-SFT/1762652580.311408", + "retrieved_timestamp": "1762652580.311409", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kz919/QwQ-0.5B-Distilled-SFT", + "developer": "kz919", + "inference_platform": "unknown", + "id": "kz919/QwQ-0.5B-Distilled-SFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3076725311063534 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3256291569645335 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07401812688821752 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3408541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15874335106382978 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ladydaina/ECE-FDF/737cda34-7dea-4c68-b6a3-5b10066f9241.json b/data/hfopenllm_v2/ladydaina/ECE-FDF/737cda34-7dea-4c68-b6a3-5b10066f9241.json new file mode 100644 index 000000000..82ea871a4 --- /dev/null +++ b/data/hfopenllm_v2/ladydaina/ECE-FDF/737cda34-7dea-4c68-b6a3-5b10066f9241.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ladydaina_ECE-FDF/1762652580.311657", + "retrieved_timestamp": "1762652580.311657", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ladydaina/ECE-FDF", + "developer": "ladydaina", + "inference_platform": "unknown", + "id": "ladydaina/ECE-FDF" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3728440537773109 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5150177593278346 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08157099697885196 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45039583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30069813829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR/012fb237-8082-40d9-882e-0dd7bc9c74cb.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR/012fb237-8082-40d9-882e-0dd7bc9c74cb.json new file mode 100644 index 000000000..9082ddb90 --- /dev/null +++ b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR/012fb237-8082-40d9-882e-0dd7bc9c74cb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-0.5B-FT-V5-MUSR/1762652580.312166", + "retrieved_timestamp": "1762652580.312166", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR", + "developer": "lalainy", + "inference_platform": "unknown", + "id": "lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21377500587330506 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32694393820046386 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15334109042553193 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-SLERP-V4/869daca0-a700-464d-a551-290ed454421e.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-SLERP-V4/869daca0-a700-464d-a551-290ed454421e.json new file mode 100644 index 000000000..b6ed63cef --- /dev/null +++ b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-0.5B-SLERP-V4/869daca0-a700-464d-a551-290ed454421e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-0.5B-SLERP-V4/1762652580.312417", + "retrieved_timestamp": "1762652580.312417", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lalainy/ECE-PRYMMAL-0.5B-SLERP-V4", + "developer": "lalainy", + "inference_platform": "unknown", + "id": "lalainy/ECE-PRYMMAL-0.5B-SLERP-V4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15639724819035714 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2894308596288922 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37892708333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11685505319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1/8822f27f-90ec-41a8-b71a-611f7c5ad590.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1/8822f27f-90ec-41a8-b71a-611f7c5ad590.json new file mode 100644 index 000000000..39c96b6ac --- /dev/null +++ b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1/8822f27f-90ec-41a8-b71a-611f7c5ad590.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1/1762652580.31263", + "retrieved_timestamp": "1762652580.31263", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1", + "developer": "lalainy", + "inference_platform": "unknown", + "id": "lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1437075847639818 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3031946898842932 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0007552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2348993288590604 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3646041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11211768617021277 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3/fa3c7a13-b37e-40b3-b814-b1ae421081ba.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3/fa3c7a13-b37e-40b3-b814-b1ae421081ba.json new file mode 100644 index 000000000..efc826d73 --- /dev/null +++ b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3/fa3c7a13-b37e-40b3-b814-b1ae421081ba.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-YL-1B-SLERP-V3/1762652580.31284", + "retrieved_timestamp": "1762652580.312841", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3", + "developer": "lalainy", + "inference_platform": "unknown", + "id": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.325008754549041 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42245501886651654 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09743202416918428 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42128125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2931349734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4/2ede8e21-33e9-45ac-9c60-9a4bd7e8e3cb.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4/2ede8e21-33e9-45ac-9c60-9a4bd7e8e3cb.json new file mode 100644 index 000000000..16db022df --- /dev/null +++ b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4/2ede8e21-33e9-45ac-9c60-9a4bd7e8e3cb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-YL-1B-SLERP-V4/1762652580.3130481", + "retrieved_timestamp": "1762652580.313049", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4", + "developer": "lalainy", + "inference_platform": "unknown", + "id": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33235260220658963 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4170742409015322 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10045317220543806 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4306145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.289311835106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1/85ac95fd-cb36-4158-818d-69c45f83dae9.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1/85ac95fd-cb36-4158-818d-69c45f83dae9.json new file mode 100644 index 000000000..847d5c989 --- /dev/null +++ b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1/85ac95fd-cb36-4158-818d-69c45f83dae9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-YL-6B-SLERP-V1/1762652580.31332", + "retrieved_timestamp": "1762652580.3133209", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1", + "developer": "lalainy", + "inference_platform": "unknown", + "id": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3264072660540699 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46293726502592586 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1268882175226586 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48639583333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32139295212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2/fd2e3c0b-8b35-463c-a001-444ed6e6dd9a.json b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2/fd2e3c0b-8b35-463c-a001-444ed6e6dd9a.json new file mode 100644 index 000000000..2f31882a5 --- /dev/null +++ b/data/hfopenllm_v2/lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2/fd2e3c0b-8b35-463c-a001-444ed6e6dd9a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-YL-6B-SLERP-V2/1762652580.3135412", + "retrieved_timestamp": "1762652580.3135412", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2", + "developer": "lalainy", + "inference_platform": "unknown", + "id": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3248835312526319 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46293726502592586 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1268882175226586 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48639583333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32139295212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.061 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/langgptai/qwen1.5-7b-chat-sa-v0.1/36137543-78a7-42a6-ad41-a4121797eec4.json b/data/hfopenllm_v2/langgptai/qwen1.5-7b-chat-sa-v0.1/36137543-78a7-42a6-ad41-a4121797eec4.json new file mode 100644 index 000000000..840af2dae --- /dev/null +++ b/data/hfopenllm_v2/langgptai/qwen1.5-7b-chat-sa-v0.1/36137543-78a7-42a6-ad41-a4121797eec4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/langgptai_qwen1.5-7b-chat-sa-v0.1/1762652580.314067", + "retrieved_timestamp": "1762652580.314068", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "langgptai/qwen1.5-7b-chat-sa-v0.1", + "developer": "langgptai", + "inference_platform": "unknown", + "id": "langgptai/qwen1.5-7b-chat-sa-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42677429221133256 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4325267992878656 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030211480362537766 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3551458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29928523936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 15.443 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lars1234/Mistral-Small-24B-Instruct-2501-writer/89742249-c51e-48e9-8bf1-7aad55e222c1.json b/data/hfopenllm_v2/lars1234/Mistral-Small-24B-Instruct-2501-writer/89742249-c51e-48e9-8bf1-7aad55e222c1.json new file mode 100644 index 000000000..2eb6dd3f5 --- /dev/null +++ b/data/hfopenllm_v2/lars1234/Mistral-Small-24B-Instruct-2501-writer/89742249-c51e-48e9-8bf1-7aad55e222c1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lars1234_Mistral-Small-24B-Instruct-2501-writer/1762652580.314311", + "retrieved_timestamp": "1762652580.314312", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lars1234/Mistral-Small-24B-Instruct-2501-writer", + "developer": "lars1234", + "inference_platform": "unknown", + "id": "lars1234/Mistral-Small-24B-Instruct-2501-writer" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6565346613651777 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6733164099871131 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3557401812688822 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38926174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46453125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5447972074468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/leafspark/Llama-3.1-8B-MultiReflection-Instruct/c8a287fc-db9e-4088-aafe-0562aa305011.json b/data/hfopenllm_v2/leafspark/Llama-3.1-8B-MultiReflection-Instruct/c8a287fc-db9e-4088-aafe-0562aa305011.json new file mode 100644 index 000000000..f7e205262 --- /dev/null +++ b/data/hfopenllm_v2/leafspark/Llama-3.1-8B-MultiReflection-Instruct/c8a287fc-db9e-4088-aafe-0562aa305011.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/leafspark_Llama-3.1-8B-MultiReflection-Instruct/1762652580.3145778", + "retrieved_timestamp": "1762652580.3145778", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "leafspark/Llama-3.1-8B-MultiReflection-Instruct", + "developer": "leafspark", + "inference_platform": "unknown", + "id": "leafspark/Llama-3.1-8B-MultiReflection-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7125382872999197 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5009088261495708 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17069486404833836 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3681979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37242353723404253 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3B/b32f3852-47ce-4ca5-98a0-5e2f166a11e9.json b/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3B/b32f3852-47ce-4ca5-98a0-5e2f166a11e9.json new file mode 100644 index 000000000..03be257dc --- /dev/null +++ b/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3B/b32f3852-47ce-4ca5-98a0-5e2f166a11e9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lesubra_ECE-EIFFEL-3B/1762652580.319232", + "retrieved_timestamp": "1762652580.319233", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lesubra/ECE-EIFFEL-3B", + "developer": "lesubra", + "inference_platform": "unknown", + "id": "lesubra/ECE-EIFFEL-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3469405621528655 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5101583259186949 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1216012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43622916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3820644946808511 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv2/7e511f3b-7d8e-44c4-ad3f-7f6e66231109.json b/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv2/7e511f3b-7d8e-44c4-ad3f-7f6e66231109.json new file mode 100644 index 000000000..fb79e9eba --- /dev/null +++ b/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv2/7e511f3b-7d8e-44c4-ad3f-7f6e66231109.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lesubra_ECE-EIFFEL-3Bv2/1762652580.319594", + "retrieved_timestamp": "1762652580.319595", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lesubra/ECE-EIFFEL-3Bv2", + "developer": "lesubra", + "inference_platform": "unknown", + "id": "lesubra/ECE-EIFFEL-3Bv2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30130276555096036 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5424007873371969 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11858006042296072 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33557046979865773 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4442916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39993351063829785 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv3/317a27cd-9458-4157-a304-0c1e3739d0fb.json b/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv3/317a27cd-9458-4157-a304-0c1e3739d0fb.json new file mode 100644 index 000000000..f42d21d39 --- /dev/null +++ b/data/hfopenllm_v2/lesubra/ECE-EIFFEL-3Bv3/317a27cd-9458-4157-a304-0c1e3739d0fb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lesubra_ECE-EIFFEL-3Bv3/1762652580.319853", + "retrieved_timestamp": "1762652580.319854", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lesubra/ECE-EIFFEL-3Bv3", + "developer": "lesubra", + "inference_platform": "unknown", + "id": "lesubra/ECE-EIFFEL-3Bv3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3786142989490109 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5469446669064592 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16691842900302115 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3296979865771812 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46751041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39752327127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V1/6fb1242d-bf20-43e6-acfe-77a88c020eee.json b/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V1/6fb1242d-bf20-43e6-acfe-77a88c020eee.json new file mode 100644 index 000000000..32407a1d1 --- /dev/null +++ b/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V1/6fb1242d-bf20-43e6-acfe-77a88c020eee.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lesubra_ECE-PRYMMAL-3B-SLERP-V1/1762652580.320159", + "retrieved_timestamp": "1762652580.32016", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lesubra/ECE-PRYMMAL-3B-SLERP-V1", + "developer": "lesubra", + "inference_platform": "unknown", + "id": "lesubra/ECE-PRYMMAL-3B-SLERP-V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2932840418977203 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5340594627933309 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45951041666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3900432180851064 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V2/cb14b942-7c2f-489f-bede-d25279ea39ac.json b/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V2/cb14b942-7c2f-489f-bede-d25279ea39ac.json new file mode 100644 index 000000000..7283b6672 --- /dev/null +++ b/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP-V2/cb14b942-7c2f-489f-bede-d25279ea39ac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lesubra_ECE-PRYMMAL-3B-SLERP-V2/1762652580.320386", + "retrieved_timestamp": "1762652580.3203871", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lesubra/ECE-PRYMMAL-3B-SLERP-V2", + "developer": "lesubra", + "inference_platform": "unknown", + "id": "lesubra/ECE-PRYMMAL-3B-SLERP-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2932840418977203 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5340594627933309 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45951041666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3900432180851064 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V1/c6b7d02d-4d2d-43fa-95a8-aa188f38120a.json b/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V1/c6b7d02d-4d2d-43fa-95a8-aa188f38120a.json new file mode 100644 index 000000000..f45bbb7a0 --- /dev/null +++ b/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V1/c6b7d02d-4d2d-43fa-95a8-aa188f38120a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lesubra_ECE-PRYMMAL-3B-SLERP_2-V1/1762652580.320611", + "retrieved_timestamp": "1762652580.3206122", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V1", + "developer": "lesubra", + "inference_platform": "unknown", + "id": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3649006857360692 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5411447467732948 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16767371601208458 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4661458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3990192819148936 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V2/653cb458-4616-4325-b377-a79ee4a5d9c6.json b/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V2/653cb458-4616-4325-b377-a79ee4a5d9c6.json new file mode 100644 index 000000000..8cf1fe49d --- /dev/null +++ b/data/hfopenllm_v2/lesubra/ECE-PRYMMAL-3B-SLERP_2-V2/653cb458-4616-4325-b377-a79ee4a5d9c6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lesubra_ECE-PRYMMAL-3B-SLERP_2-V2/1762652580.320825", + "retrieved_timestamp": "1762652580.320826", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V2", + "developer": "lesubra", + "inference_platform": "unknown", + "id": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3664244205375071 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5411447467732948 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16767371601208458 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4661458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3990192819148936 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lesubra/merge-test/6f16b360-346a-4299-8f60-fafc0bb8ebcd.json b/data/hfopenllm_v2/lesubra/merge-test/6f16b360-346a-4299-8f60-fafc0bb8ebcd.json new file mode 100644 index 000000000..70d8ede22 --- /dev/null +++ b/data/hfopenllm_v2/lesubra/merge-test/6f16b360-346a-4299-8f60-fafc0bb8ebcd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lesubra_merge-test/1762652580.321054", + "retrieved_timestamp": "1762652580.321055", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lesubra/merge-test", + "developer": "lesubra", + "inference_platform": "unknown", + "id": "lesubra/merge-test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.538257379309122 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5240434385320306 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221476510067114 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44190625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38738364361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lkoenig/BBAI_145_/0f29b1ac-1943-463a-8a79-a4c0ace371cb.json b/data/hfopenllm_v2/lkoenig/BBAI_145_/0f29b1ac-1943-463a-8a79-a4c0ace371cb.json new file mode 100644 index 000000000..689cbf795 --- /dev/null +++ b/data/hfopenllm_v2/lkoenig/BBAI_145_/0f29b1ac-1943-463a-8a79-a4c0ace371cb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_145_/1762652580.322459", + "retrieved_timestamp": "1762652580.32246", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lkoenig/BBAI_145_", + "developer": "lkoenig", + "inference_platform": "unknown", + "id": "lkoenig/BBAI_145_" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44503473007176514 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5567169940219221 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3610271903323263 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4382083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.448969414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5/334bc38a-becd-405b-8982-dfaf5de35c4b.json b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5/334bc38a-becd-405b-8982-dfaf5de35c4b.json new file mode 100644 index 000000000..20f39839c --- /dev/null +++ b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5/334bc38a-becd-405b-8982-dfaf5de35c4b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/llnYou_ECE-PRYMMAL-YL-1B-SLERP-V5/1762652580.3253949", + "retrieved_timestamp": "1762652580.325396", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5", + "developer": "llnYou", + "inference_platform": "unknown", + "id": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33125329680802496 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42329545804357255 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11102719033232629 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3868020833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29305186170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6/eaa1adca-5379-4aab-bf39-8641df58a530.json b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6/eaa1adca-5379-4aab-bf39-8641df58a530.json new file mode 100644 index 000000000..c367e488d --- /dev/null +++ b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6/eaa1adca-5379-4aab-bf39-8641df58a530.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/llnYou_ECE-PRYMMAL-YL-1B-SLERP-V6/1762652580.325702", + "retrieved_timestamp": "1762652580.325703", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6", + "developer": "llnYou", + "inference_platform": "unknown", + "id": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13876181864120535 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3944027089700251 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0022658610271903325 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39279166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2349567819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.357 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1/844c959f-6859-4220-bdd8-99e6af53808b.json b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1/844c959f-6859-4220-bdd8-99e6af53808b.json new file mode 100644 index 000000000..86a6830d8 --- /dev/null +++ b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1/844c959f-6859-4220-bdd8-99e6af53808b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V1/1762652580.325917", + "retrieved_timestamp": "1762652580.325917", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1", + "developer": "llnYou", + "inference_platform": "unknown", + "id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23463299600615256 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4018418465179459 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3364479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2849900265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.81 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2/2bb16fd8-516f-42d6-91e1-2f3f4024f0d4.json b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2/2bb16fd8-516f-42d6-91e1-2f3f4024f0d4.json new file mode 100644 index 000000000..832d61d83 --- /dev/null +++ b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2/2bb16fd8-516f-42d6-91e1-2f3f4024f0d4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V2/1762652580.326129", + "retrieved_timestamp": "1762652580.326129", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2", + "developer": "llnYou", + "inference_platform": "unknown", + "id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2309361383351729 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39897709281426197 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3587708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28997672872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.81 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3/183cd87c-2415-4428-9ad1-9d41c0dcdc41.json b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3/183cd87c-2415-4428-9ad1-9d41c0dcdc41.json new file mode 100644 index 000000000..49e14aff0 --- /dev/null +++ b/data/hfopenllm_v2/llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3/183cd87c-2415-4428-9ad1-9d41c0dcdc41.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V3/1762652580.326333", + "retrieved_timestamp": "1762652580.326334", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3", + "developer": "llnYou", + "inference_platform": "unknown", + "id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35808100285021516 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5473121918055145 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43613541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40433843085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lmsys/vicuna-13b-v1.3/5b0377fc-5df1-4ed0-bad4-ab13bc42677c.json b/data/hfopenllm_v2/lmsys/vicuna-13b-v1.3/5b0377fc-5df1-4ed0-bad4-ab13bc42677c.json new file mode 100644 index 000000000..e389ddb87 --- /dev/null +++ b/data/hfopenllm_v2/lmsys/vicuna-13b-v1.3/5b0377fc-5df1-4ed0-bad4-ab13bc42677c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lmsys_vicuna-13b-v1.3/1762652580.3265438", + "retrieved_timestamp": "1762652580.326545", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lmsys/vicuna-13b-v1.3", + "developer": "lmsys", + "inference_platform": "unknown", + "id": "lmsys/vicuna-13b-v1.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3343506340953115 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3384399312777569 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3727291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2243184840425532 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lmsys/vicuna-7b-v1.3/b8e50988-f2c5-4508-a5c5-2813d94f7ebd.json b/data/hfopenllm_v2/lmsys/vicuna-7b-v1.3/b8e50988-f2c5-4508-a5c5-2813d94f7ebd.json new file mode 100644 index 000000000..f23754b10 --- /dev/null +++ b/data/hfopenllm_v2/lmsys/vicuna-7b-v1.3/b8e50988-f2c5-4508-a5c5-2813d94f7ebd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lmsys_vicuna-7b-v1.3/1762652580.326798", + "retrieved_timestamp": "1762652580.3267992", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lmsys/vicuna-7b-v1.3", + "developer": "lmsys", + "inference_platform": "unknown", + "id": "lmsys/vicuna-7b-v1.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29086158060612505 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3298410006592924 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2424496644295302 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3793333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18375997340425532 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lmsys/vicuna-7b-v1.5/26c5c07e-8482-44b4-8f11-a602e79fb730.json b/data/hfopenllm_v2/lmsys/vicuna-7b-v1.5/26c5c07e-8482-44b4-8f11-a602e79fb730.json new file mode 100644 index 000000000..3e212ae23 --- /dev/null +++ b/data/hfopenllm_v2/lmsys/vicuna-7b-v1.5/26c5c07e-8482-44b4-8f11-a602e79fb730.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lmsys_vicuna-7b-v1.5/1762652580.327009", + "retrieved_timestamp": "1762652580.3270102", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lmsys/vicuna-7b-v1.5", + "developer": "lmsys", + "inference_platform": "unknown", + "id": "lmsys/vicuna-7b-v1.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23515716077784724 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39470436842233775 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42311458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21467752659574468 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7/81d006e2-3be1-4941-bf85-74f1b313c7d7.json b/data/hfopenllm_v2/lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7/81d006e2-3be1-4941-bf85-74f1b313c7d7.json new file mode 100644 index 000000000..5275e4de3 --- /dev/null +++ b/data/hfopenllm_v2/lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7/81d006e2-3be1-4941-bf85-74f1b313c7d7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lodrick-the-lafted_llama-3.1-8b-instruct-ortho-v7/1762652580.327225", + "retrieved_timestamp": "1762652580.3272262", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7", + "developer": "lodrick-the-lafted", + "inference_platform": "unknown", + "id": "lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3514618988727687 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39069140261362917 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027190332326283987 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36159375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1973902925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lordjia/Llama-3-Cantonese-8B-Instruct/f453cb41-346c-48b4-a660-64f13ec69fe4.json b/data/hfopenllm_v2/lordjia/Llama-3-Cantonese-8B-Instruct/f453cb41-346c-48b4-a660-64f13ec69fe4.json new file mode 100644 index 000000000..502d472c4 --- /dev/null +++ b/data/hfopenllm_v2/lordjia/Llama-3-Cantonese-8B-Instruct/f453cb41-346c-48b4-a660-64f13ec69fe4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lordjia_Llama-3-Cantonese-8B-Instruct/1762652580.3274932", + "retrieved_timestamp": "1762652580.3274932", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lordjia/Llama-3-Cantonese-8B-Instruct", + "developer": "lordjia", + "inference_platform": "unknown", + "id": "lordjia/Llama-3-Cantonese-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6669259786256023 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4814148018954038 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40460416666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35147938829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lordjia/Qwen2-Cantonese-7B-Instruct/869339ec-939c-4222-b178-533c3ca5b0d1.json b/data/hfopenllm_v2/lordjia/Qwen2-Cantonese-7B-Instruct/869339ec-939c-4222-b178-533c3ca5b0d1.json new file mode 100644 index 000000000..29f706780 --- /dev/null +++ b/data/hfopenllm_v2/lordjia/Qwen2-Cantonese-7B-Instruct/869339ec-939c-4222-b178-533c3ca5b0d1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lordjia_Qwen2-Cantonese-7B-Instruct/1762652580.3277462", + "retrieved_timestamp": "1762652580.3277462", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lordjia/Qwen2-Cantonese-7B-Instruct", + "developer": "lordjia", + "inference_platform": "unknown", + "id": "lordjia/Qwen2-Cantonese-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5435278394659503 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5215311346221223 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25604229607250756 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40038541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38430851063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lt-asset/nova-1.3b/4c3005e9-fffd-491b-8ce1-58204986b787.json b/data/hfopenllm_v2/lt-asset/nova-1.3b/4c3005e9-fffd-491b-8ce1-58204986b787.json new file mode 100644 index 000000000..18964697f --- /dev/null +++ b/data/hfopenllm_v2/lt-asset/nova-1.3b/4c3005e9-fffd-491b-8ce1-58204986b787.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lt-asset_nova-1.3b/1762652580.3279538", + "retrieved_timestamp": "1762652580.327955", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lt-asset/nova-1.3b", + "developer": "lt-asset", + "inference_platform": "unknown", + "id": "lt-asset/nova-1.3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1214255951985177 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31700122104895806 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24916107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36978125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11419547872340426 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "NovaForCausalLM", + "params_billions": 1.347 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lunahr/thea-3b-50r-u1/977449d7-d8f0-4e32-b56c-8950006a09a4.json b/data/hfopenllm_v2/lunahr/thea-3b-50r-u1/977449d7-d8f0-4e32-b56c-8950006a09a4.json new file mode 100644 index 000000000..6878b1ae4 --- /dev/null +++ b/data/hfopenllm_v2/lunahr/thea-3b-50r-u1/977449d7-d8f0-4e32-b56c-8950006a09a4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lunahr_thea-3b-50r-u1/1762652580.328209", + "retrieved_timestamp": "1762652580.328209", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lunahr/thea-3b-50r-u1", + "developer": "lunahr", + "inference_platform": "unknown", + "id": "lunahr/thea-3b-50r-u1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6030288523340293 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41046731029294475 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3181875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2808344414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/lunahr/thea-v2-3b-50r/03d675d8-ee8d-47de-8bf3-ef386bd8a88f.json b/data/hfopenllm_v2/lunahr/thea-v2-3b-50r/03d675d8-ee8d-47de-8bf3-ef386bd8a88f.json new file mode 100644 index 000000000..781f5f567 --- /dev/null +++ b/data/hfopenllm_v2/lunahr/thea-v2-3b-50r/03d675d8-ee8d-47de-8bf3-ef386bd8a88f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lunahr_thea-v2-3b-50r/1762652580.328458", + "retrieved_timestamp": "1762652580.328459", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lunahr/thea-v2-3b-50r", + "developer": "lunahr", + "inference_platform": "unknown", + "id": "lunahr/thea-v2-3b-50r" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.370396104558128 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4194416192911743 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02416918429003021 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2409408244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-base/e0f596ba-89ee-4fa7-b5dc-698c2a5fda95.json b/data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-base/e0f596ba-89ee-4fa7-b5dc-698c2a5fda95.json new file mode 100644 index 000000000..28187aca6 --- /dev/null +++ b/data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-base/e0f596ba-89ee-4fa7-b5dc-698c2a5fda95.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/macadeliccc_magistrate-3.2-3b-base/1762652580.32929", + "retrieved_timestamp": "1762652580.329291", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "macadeliccc/magistrate-3.2-3b-base", + "developer": "macadeliccc", + "inference_platform": "unknown", + "id": "macadeliccc/magistrate-3.2-3b-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1159301763764589 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3342701056047533 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.011329305135951661 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39759374999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16888297872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-it/df26db97-8e5e-409e-937d-45951c81a8cd.json b/data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-it/df26db97-8e5e-409e-937d-45951c81a8cd.json new file mode 100644 index 000000000..9b5a576d9 --- /dev/null +++ b/data/hfopenllm_v2/macadeliccc/magistrate-3.2-3b-it/df26db97-8e5e-409e-937d-45951c81a8cd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/macadeliccc_magistrate-3.2-3b-it/1762652580.329552", + "retrieved_timestamp": "1762652580.329552", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "macadeliccc/magistrate-3.2-3b-it", + "developer": "macadeliccc", + "inference_platform": "unknown", + "id": "macadeliccc/magistrate-3.2-3b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22918744486850445 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3256506790327196 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24748322147651006 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3763229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15924202127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/maldv/Awqward2.5-32B-Instruct/8b330a87-7689-45ae-a005-0349e09f07ac.json b/data/hfopenllm_v2/maldv/Awqward2.5-32B-Instruct/8b330a87-7689-45ae-a005-0349e09f07ac.json new file mode 100644 index 000000000..e61e74838 --- /dev/null +++ b/data/hfopenllm_v2/maldv/Awqward2.5-32B-Instruct/8b330a87-7689-45ae-a005-0349e09f07ac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/maldv_Awqward2.5-32B-Instruct/1762652580.3302772", + "retrieved_timestamp": "1762652580.330278", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "maldv/Awqward2.5-32B-Instruct", + "developer": "maldv", + "inference_platform": "unknown", + "id": "maldv/Awqward2.5-32B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8254697535871487 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6974465506773041 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6231117824773413 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34060402684563756 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42748958333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5723071808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/maldv/Lytta2.5-32B-Instruct/27575e22-2e66-4177-aa8f-ab4ebd4743ea.json b/data/hfopenllm_v2/maldv/Lytta2.5-32B-Instruct/27575e22-2e66-4177-aa8f-ab4ebd4743ea.json new file mode 100644 index 000000000..9ca83a8e1 --- /dev/null +++ b/data/hfopenllm_v2/maldv/Lytta2.5-32B-Instruct/27575e22-2e66-4177-aa8f-ab4ebd4743ea.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/maldv_Lytta2.5-32B-Instruct/1762652580.3306072", + "retrieved_timestamp": "1762652580.3306088", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "maldv/Lytta2.5-32B-Instruct", + "developer": "maldv", + "inference_platform": "unknown", + "id": "maldv/Lytta2.5-32B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25079455843827714 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.559971089357847 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34441087613293053 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37685416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5048204787234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/maldv/Qwentile2.5-32B-Instruct/f4fde074-8a05-42ec-884c-447b4bfaba39.json b/data/hfopenllm_v2/maldv/Qwentile2.5-32B-Instruct/f4fde074-8a05-42ec-884c-447b4bfaba39.json new file mode 100644 index 000000000..1441b4a49 --- /dev/null +++ b/data/hfopenllm_v2/maldv/Qwentile2.5-32B-Instruct/f4fde074-8a05-42ec-884c-447b4bfaba39.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/maldv_Qwentile2.5-32B-Instruct/1762652580.3309162", + "retrieved_timestamp": "1762652580.3309171", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "maldv/Qwentile2.5-32B-Instruct", + "developer": "maldv", + "inference_platform": "unknown", + "id": "maldv/Qwentile2.5-32B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7393161256576994 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6962837451098368 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5219033232628398 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38422818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4682291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5879321808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/Cheng-1/7aa1c718-9ac6-426b-be50-5c7f37849b90.json b/data/hfopenllm_v2/marcuscedricridia/Cheng-1/7aa1c718-9ac6-426b-be50-5c7f37849b90.json new file mode 100644 index 000000000..373f2f6a4 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/Cheng-1/7aa1c718-9ac6-426b-be50-5c7f37849b90.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Cheng-1/1762652580.332221", + "retrieved_timestamp": "1762652580.332222", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/Cheng-1", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/Cheng-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7788833628106757 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5524677845280024 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48942598187311176 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4073333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43492353723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/Cheng-2-v1.1/a720e9bc-e8dd-4b7a-8d22-7b9f4b42ebe0.json b/data/hfopenllm_v2/marcuscedricridia/Cheng-2-v1.1/a720e9bc-e8dd-4b7a-8d22-7b9f4b42ebe0.json new file mode 100644 index 000000000..9427deddf --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/Cheng-2-v1.1/a720e9bc-e8dd-4b7a-8d22-7b9f4b42ebe0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Cheng-2-v1.1/1762652580.332704", + "retrieved_timestamp": "1762652580.332705", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/Cheng-2-v1.1", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/Cheng-2-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8269934883885868 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6510142192324059 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5392749244712991 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34312080536912754 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41672916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5076462765957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/Cheng-2/dbadece3-665b-423b-b2d9-e74d7c676133.json b/data/hfopenllm_v2/marcuscedricridia/Cheng-2/dbadece3-665b-423b-b2d9-e74d7c676133.json new file mode 100644 index 000000000..d11a28143 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/Cheng-2/dbadece3-665b-423b-b2d9-e74d7c676133.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_Cheng-2/1762652580.332486", + "retrieved_timestamp": "1762652580.3324869", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/Cheng-2", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/Cheng-2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8337378156624423 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6498988582965893 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5438066465256798 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34563758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41933333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5013297872340425 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/absolute-o1-7b/4e9eef3d-b851-41de-a3b2-88950f1d426f.json b/data/hfopenllm_v2/marcuscedricridia/absolute-o1-7b/4e9eef3d-b851-41de-a3b2-88950f1d426f.json new file mode 100644 index 000000000..4c67e18b5 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/absolute-o1-7b/4e9eef3d-b851-41de-a3b2-88950f1d426f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_absolute-o1-7b/1762652580.335638", + "retrieved_timestamp": "1762652580.335639", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/absolute-o1-7b", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/absolute-o1-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7515558717536137 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5469413884153854 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5083081570996979 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4113645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44132313829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-2-28-2025/2a0bcf8c-cf70-4d13-a713-67054bc98412.json b/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-2-28-2025/2a0bcf8c-cf70-4d13-a713-67054bc98412.json new file mode 100644 index 000000000..0f8ff93f6 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-2-28-2025/2a0bcf8c-cf70-4d13-a713-67054bc98412.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursa-o1-7b-2-28-2025/1762652580.3360791", + "retrieved_timestamp": "1762652580.3360798", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/cursa-o1-7b-2-28-2025", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/cursa-o1-7b-2-28-2025" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7467098409996586 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.538413713363387 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4811178247734139 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42733333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4365026595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.1/f24a1f02-da21-49f0-91b9-65df4fd770db.json b/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.1/f24a1f02-da21-49f0-91b9-65df4fd770db.json new file mode 100644 index 000000000..718dbed43 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.1/f24a1f02-da21-49f0-91b9-65df4fd770db.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursa-o1-7b-v1.1/1762652580.336299", + "retrieved_timestamp": "1762652580.3363001", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/cursa-o1-7b-v1.1", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/cursa-o1-7b-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7527549125209998 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5492557305346194 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4984894259818731 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.425875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43916223404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.2-normalize-false/2632f42e-cbe3-4c55-b434-f4a239aeffa4.json b/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.2-normalize-false/2632f42e-cbe3-4c55-b434-f4a239aeffa4.json new file mode 100644 index 000000000..ac1796aae --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b-v1.2-normalize-false/2632f42e-cbe3-4c55-b434-f4a239aeffa4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursa-o1-7b-v1.2-normalize-false/1762652580.3365178", + "retrieved_timestamp": "1762652580.3365178", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/cursa-o1-7b-v1.2-normalize-false", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/cursa-o1-7b-v1.2-normalize-false" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7615726272955757 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5492349810703803 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49924471299093653 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4272708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4435671542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b/0f7f339a-5523-4551-ba77-4fe34779d017.json b/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b/0f7f339a-5523-4551-ba77-4fe34779d017.json new file mode 100644 index 000000000..946d73713 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/cursa-o1-7b/0f7f339a-5523-4551-ba77-4fe34779d017.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursa-o1-7b/1762652580.335863", + "retrieved_timestamp": "1762652580.335863", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/cursa-o1-7b", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/cursa-o1-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7628215357473725 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5465860023973769 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4954682779456193 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4300625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4392453457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/cursor-o1-7b/764c4dcb-caea-418c-b206-ee401ea0d979.json b/data/hfopenllm_v2/marcuscedricridia/cursor-o1-7b/764c4dcb-caea-418c-b206-ee401ea0d979.json new file mode 100644 index 000000000..5f4c75f1b --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/cursor-o1-7b/764c4dcb-caea-418c-b206-ee401ea0d979.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursor-o1-7b/1762652580.3367229", + "retrieved_timestamp": "1762652580.336724", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/cursor-o1-7b", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/cursor-o1-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4106880853912065 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5007453242508472 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14123867069486404 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41009375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32513297872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/cursorr-o1.2-7b/51cd189c-82a8-4475-8df5-9a855394274a.json b/data/hfopenllm_v2/marcuscedricridia/cursorr-o1.2-7b/51cd189c-82a8-4475-8df5-9a855394274a.json new file mode 100644 index 000000000..5f8e6d978 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/cursorr-o1.2-7b/51cd189c-82a8-4475-8df5-9a855394274a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursorr-o1.2-7b/1762652580.336929", + "retrieved_timestamp": "1762652580.336929", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/cursorr-o1.2-7b", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/cursorr-o1.2-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1659895743294459 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3068134113454804 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35384375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10804521276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.1/02fe0385-223e-4578-b3fb-d6819f783861.json b/data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.1/02fe0385-223e-4578-b3fb-d6819f783861.json new file mode 100644 index 000000000..647200d3b --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.1/02fe0385-223e-4578-b3fb-d6819f783861.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_etr1o-explicit-v1.1/1762652580.337136", + "retrieved_timestamp": "1762652580.337137", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/etr1o-explicit-v1.1", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/etr1o-explicit-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28803906966847964 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31316553135589525 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.004531722054380665 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4110520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11951462765957446 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.2/3ec5106d-86be-48a8-bb3d-6574b6971641.json b/data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.2/3ec5106d-86be-48a8-bb3d-6574b6971641.json new file mode 100644 index 000000000..a6ad6a89d --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/etr1o-explicit-v1.2/3ec5106d-86be-48a8-bb3d-6574b6971641.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_etr1o-explicit-v1.2/1762652580.337388", + "retrieved_timestamp": "1762652580.337389", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/etr1o-explicit-v1.2", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/etr1o-explicit-v1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1504020443534267 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29497368605886115 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40311458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11261635638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/etr1o-v1.1/cd68d6d9-a5c7-4f32-b372-0e954af830ad.json b/data/hfopenllm_v2/marcuscedricridia/etr1o-v1.1/cd68d6d9-a5c7-4f32-b372-0e954af830ad.json new file mode 100644 index 000000000..8fcba36be --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/etr1o-v1.1/cd68d6d9-a5c7-4f32-b372-0e954af830ad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_etr1o-v1.1/1762652580.3376079", + "retrieved_timestamp": "1762652580.337609", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/etr1o-v1.1", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/etr1o-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15971954414287426 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31003625778742805 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40165625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11569148936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/etr1o-v1.2/81b5a281-9dc6-4ae5-8079-d0e308a20c8e.json b/data/hfopenllm_v2/marcuscedricridia/etr1o-v1.2/81b5a281-9dc6-4ae5-8079-d0e308a20c8e.json new file mode 100644 index 000000000..ea1c66d2e --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/etr1o-v1.2/81b5a281-9dc6-4ae5-8079-d0e308a20c8e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_etr1o-v1.2/1762652580.337824", + "retrieved_timestamp": "1762652580.337825", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/etr1o-v1.2", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/etr1o-v1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7286998497320443 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6349035922791185 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35876132930513593 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37583892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4714479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5315824468085106 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/fan-o1-7b/9693b68f-ac5c-4111-804c-0505ec8bf06d.json b/data/hfopenllm_v2/marcuscedricridia/fan-o1-7b/9693b68f-ac5c-4111-804c-0505ec8bf06d.json new file mode 100644 index 000000000..30bdf8c39 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/fan-o1-7b/9693b68f-ac5c-4111-804c-0505ec8bf06d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_fan-o1-7b/1762652580.338023", + "retrieved_timestamp": "1762652580.338024", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/fan-o1-7b", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/fan-o1-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4455588948434598 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4849058892394324 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16163141993957703 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3833645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3273769946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/olmner-7b/5064ebea-3ec3-4344-867f-e33f8937d096.json b/data/hfopenllm_v2/marcuscedricridia/olmner-7b/5064ebea-3ec3-4344-867f-e33f8937d096.json new file mode 100644 index 000000000..93e4d4576 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/olmner-7b/5064ebea-3ec3-4344-867f-e33f8937d096.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_olmner-7b/1762652580.338225", + "retrieved_timestamp": "1762652580.338225", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/olmner-7b", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/olmner-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7253775537795273 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5471591805569388 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43796875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4309341755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/olmner-della-7b/062e407e-7820-459f-83da-b670f8adff9d.json b/data/hfopenllm_v2/marcuscedricridia/olmner-della-7b/062e407e-7820-459f-83da-b670f8adff9d.json new file mode 100644 index 000000000..e93c1e6ba --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/olmner-della-7b/062e407e-7820-459f-83da-b670f8adff9d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_olmner-della-7b/1762652580.338445", + "retrieved_timestamp": "1762652580.3384461", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/olmner-della-7b", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/olmner-della-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7636958824807067 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5491231851969524 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4962235649546828 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4207604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43858045212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/olmner-o1-7b/b1669ad9-450f-4a93-8094-26f427beb49f.json b/data/hfopenllm_v2/marcuscedricridia/olmner-o1-7b/b1669ad9-450f-4a93-8094-26f427beb49f.json new file mode 100644 index 000000000..e4c5d49d7 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/olmner-o1-7b/b1669ad9-450f-4a93-8094-26f427beb49f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_olmner-o1-7b/1762652580.338658", + "retrieved_timestamp": "1762652580.338659", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/olmner-o1-7b", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/olmner-o1-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7527549125209998 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5480873056178129 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49244712990936557 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42990625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43858045212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/olmner-sbr-7b/afb014ed-a2e6-46b9-9ee9-a6a1f52e43cf.json b/data/hfopenllm_v2/marcuscedricridia/olmner-sbr-7b/afb014ed-a2e6-46b9-9ee9-a6a1f52e43cf.json new file mode 100644 index 000000000..c504bf41a --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/olmner-sbr-7b/afb014ed-a2e6-46b9-9ee9-a6a1f52e43cf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_olmner-sbr-7b/1762652580.338864", + "retrieved_timestamp": "1762652580.3388648", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/olmner-sbr-7b", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/olmner-sbr-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7600488924941378 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5461642048146724 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4947129909365559 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4153645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4412400265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/post-cursa-o1/c9632855-db4e-40bb-b140-2ff524d31fd2.json b/data/hfopenllm_v2/marcuscedricridia/post-cursa-o1/c9632855-db4e-40bb-b140-2ff524d31fd2.json new file mode 100644 index 000000000..3355eecf6 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/post-cursa-o1/c9632855-db4e-40bb-b140-2ff524d31fd2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_post-cursa-o1/1762652580.3390641", + "retrieved_timestamp": "1762652580.339065", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/post-cursa-o1", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/post-cursa-o1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7628215357473725 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5479692437233474 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4871601208459215 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43514583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4360871010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.2/9db3b6b0-7cc8-48b6-85f5-1662cad07fae.json b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.2/9db3b6b0-7cc8-48b6-85f5-1662cad07fae.json new file mode 100644 index 000000000..b4a52d66a --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.2/9db3b6b0-7cc8-48b6-85f5-1662cad07fae.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_pre-cursa-o1-v1.2/1762652580.339467", + "retrieved_timestamp": "1762652580.339468", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/pre-cursa-o1-v1.2", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/pre-cursa-o1-v1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7548781677061308 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5486788313377599 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.506797583081571 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42723958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4402426861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.3/f86cf126-4fb3-4419-82bf-e5c0168e25cb.json b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.3/f86cf126-4fb3-4419-82bf-e5c0168e25cb.json new file mode 100644 index 000000000..8b5057118 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.3/f86cf126-4fb3-4419-82bf-e5c0168e25cb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_pre-cursa-o1-v1.3/1762652580.339683", + "retrieved_timestamp": "1762652580.339684", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/pre-cursa-o1-v1.3", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/pre-cursa-o1-v1.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7506815250202795 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5454519705653261 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5075528700906344 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42714583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4419880319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.4/4ed1f68a-6bc9-4621-beb1-3d274247cdb6.json b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.4/4ed1f68a-6bc9-4621-beb1-3d274247cdb6.json new file mode 100644 index 000000000..5f3d81adc --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.4/4ed1f68a-6bc9-4621-beb1-3d274247cdb6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_pre-cursa-o1-v1.4/1762652580.3398788", + "retrieved_timestamp": "1762652580.33988", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/pre-cursa-o1-v1.4", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/pre-cursa-o1-v1.4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.748783228500379 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5493014138981462 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48338368580060426 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42851041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4435671542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.6/50627b31-a8d4-401a-8449-5f33cfb17893.json b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.6/50627b31-a8d4-401a-8449-5f33cfb17893.json new file mode 100644 index 000000000..e3ebbc58c --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1-v1.6/50627b31-a8d4-401a-8449-5f33cfb17893.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_pre-cursa-o1-v1.6/1762652580.340074", + "retrieved_timestamp": "1762652580.340075", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/pre-cursa-o1-v1.6", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/pre-cursa-o1-v1.6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7527549125209998 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5473342320067097 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4233645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44132313829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1/51fc3a16-67c2-448b-9854-07ab8adc4dea.json b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1/51fc3a16-67c2-448b-9854-07ab8adc4dea.json new file mode 100644 index 000000000..fcc9e10f4 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/pre-cursa-o1/51fc3a16-67c2-448b-9854-07ab8adc4dea.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_pre-cursa-o1/1762652580.3392608", + "retrieved_timestamp": "1762652580.339262", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/pre-cursa-o1", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/pre-cursa-o1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.740889728143548 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5461688442794247 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5037764350453172 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42596875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4424035904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/r1o-et/84de36db-b427-40c4-80f6-2114c8ad4e4f.json b/data/hfopenllm_v2/marcuscedricridia/r1o-et/84de36db-b427-40c4-80f6-2114c8ad4e4f.json new file mode 100644 index 000000000..3aeefb5bf --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/r1o-et/84de36db-b427-40c4-80f6-2114c8ad4e4f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_r1o-et/1762652580.340277", + "retrieved_timestamp": "1762652580.340277", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/r1o-et", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/r1o-et" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3596800932636516 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42092007019831174 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07930513595166164 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3579375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2579787234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/sbr-o1-7b/05666c00-3b8c-48f3-9e36-bc9a116bb0c6.json b/data/hfopenllm_v2/marcuscedricridia/sbr-o1-7b/05666c00-3b8c-48f3-9e36-bc9a116bb0c6.json new file mode 100644 index 000000000..084281100 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/sbr-o1-7b/05666c00-3b8c-48f3-9e36-bc9a116bb0c6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_sbr-o1-7b/1762652580.340477", + "retrieved_timestamp": "1762652580.340478", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/sbr-o1-7b", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/sbr-o1-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7454609325478618 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5478826565229475 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4984894259818731 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4404166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43550531914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/marcuscedricridia/stray-r1o-et/cbf68d01-b993-4bcd-b174-23e3b6e28d3a.json b/data/hfopenllm_v2/marcuscedricridia/stray-r1o-et/cbf68d01-b993-4bcd-b174-23e3b6e28d3a.json new file mode 100644 index 000000000..b087ed087 --- /dev/null +++ b/data/hfopenllm_v2/marcuscedricridia/stray-r1o-et/cbf68d01-b993-4bcd-b174-23e3b6e28d3a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/marcuscedricridia_stray-r1o-et/1762652580.340682", + "retrieved_timestamp": "1762652580.340683", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "marcuscedricridia/stray-r1o-et", + "developer": "marcuscedricridia", + "inference_platform": "unknown", + "id": "marcuscedricridia/stray-r1o-et" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15622215720953736 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2967459956151434 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.004531722054380665 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4085729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.109375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3/4800a6d0-8458-405a-95ca-6d0690a8f769.json b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3/4800a6d0-8458-405a-95ca-6d0690a8f769.json new file mode 100644 index 000000000..88ab5e6e2 --- /dev/null +++ b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3/4800a6d0-8458-405a-95ca-6d0690a8f769.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/matouLeLoup_ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3/1762652580.340896", + "retrieved_timestamp": "1762652580.340897", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3", + "developer": "matouLeLoup", + "inference_platform": "unknown", + "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18732186154957736 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3239117424825444 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37520833333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17195811170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis/95c9ef47-8194-4c00-bbea-a65a7715f9f3.json b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis/95c9ef47-8194-4c00-bbea-a65a7715f9f3.json new file mode 100644 index 000000000..776ef5db4 --- /dev/null +++ b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis/95c9ef47-8194-4c00-bbea-a65a7715f9f3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/matouLeLoup_ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis/1762652580.3411388", + "retrieved_timestamp": "1762652580.34114", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis", + "developer": "matouLeLoup", + "inference_platform": "unknown", + "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18732186154957736 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3239117424825444 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37520833333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17195811170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis/b88d6df2-5642-4837-bf04-4d804a4ba3c4.json b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis/b88d6df2-5642-4837-bf04-4d804a4ba3c4.json new file mode 100644 index 000000000..ad9bbdc95 --- /dev/null +++ b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis/b88d6df2-5642-4837-bf04-4d804a4ba3c4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis/1762652580.341354", + "retrieved_timestamp": "1762652580.341354", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis", + "developer": "matouLeLoup", + "inference_platform": "unknown", + "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18732186154957736 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3239117424825444 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37520833333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17195811170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis/679f1499-572e-4f60-9b2d-4c8199d71107.json b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis/679f1499-572e-4f60-9b2d-4c8199d71107.json new file mode 100644 index 000000000..d3388eca3 --- /dev/null +++ b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis/679f1499-572e-4f60-9b2d-4c8199d71107.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis/1762652580.341564", + "retrieved_timestamp": "1762652580.341565", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis", + "developer": "matouLeLoup", + "inference_platform": "unknown", + "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18824607596732226 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32327887380902803 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027190332326283987 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3684791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17204122340425532 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/8da1b04b-c3a8-4554-bcb5-0e08dcfd7483.json b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/8da1b04b-c3a8-4554-bcb5-0e08dcfd7483.json new file mode 100644 index 000000000..453294a96 --- /dev/null +++ b/data/hfopenllm_v2/matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/8da1b04b-c3a8-4554-bcb5-0e08dcfd7483.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/1762652580.3417778", + "retrieved_timestamp": "1762652580.341779", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis", + "developer": "matouLeLoup", + "inference_platform": "unknown", + "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16521496296493304 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30237295164613204 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0188821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42730208333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1116190159574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 0.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mattshumer/ref_70_e3/8ab597da-85ec-45d5-b5e2-f51ca8a2f3c9.json b/data/hfopenllm_v2/mattshumer/ref_70_e3/8ab597da-85ec-45d5-b5e2-f51ca8a2f3c9.json new file mode 100644 index 000000000..083a79de4 --- /dev/null +++ b/data/hfopenllm_v2/mattshumer/ref_70_e3/8ab597da-85ec-45d5-b5e2-f51ca8a2f3c9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mattshumer_ref_70_e3/1762652580.342239", + "retrieved_timestamp": "1762652580.34224", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mattshumer/ref_70_e3", + "developer": "mattshumer", + "inference_platform": "unknown", + "id": "mattshumer/ref_70_e3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6294321289733462 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6500839481104265 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2794561933534743 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33557046979865773 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4327604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5302526595744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-Instruct/f4c341cb-6489-49a1-9532-6b78c2238b2a.json b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-Instruct/f4c341cb-6489-49a1-9532-6b78c2238b2a.json new file mode 100644 index 000000000..ca210f3d7 --- /dev/null +++ b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-Instruct/f4c341cb-6489-49a1-9532-6b78c2238b2a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-1B-Instruct/1762652580.343025", + "retrieved_timestamp": "1762652580.343026", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meditsolutions/Llama-3.2-SUN-1B-Instruct", + "developer": "meditsolutions", + "inference_platform": "unknown", + "id": "meditsolutions/Llama-3.2-SUN-1B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6412973133507981 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34738999022447486 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07099697885196375 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2424496644295302 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35136458333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17810837765957446 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaMedITForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-chat/7e72df4d-7a54-4e11-b4a2-44224db285ec.json b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-chat/7e72df4d-7a54-4e11-b4a2-44224db285ec.json new file mode 100644 index 000000000..670d1357f --- /dev/null +++ b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-1B-chat/7e72df4d-7a54-4e11-b4a2-44224db285ec.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-1B-chat/1762652580.343276", + "retrieved_timestamp": "1762652580.343277", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meditsolutions/Llama-3.2-SUN-1B-chat", + "developer": "meditsolutions", + "inference_platform": "unknown", + "id": "meditsolutions/Llama-3.2-SUN-1B-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5481743994822625 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35144575516411386 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06419939577039276 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3249166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18375997340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.5B-chat/7385392b-79e9-4764-9326-d7bc1586b918.json b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.5B-chat/7385392b-79e9-4764-9326-d7bc1586b918.json new file mode 100644 index 000000000..c42f944fd --- /dev/null +++ b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-2.5B-chat/7385392b-79e9-4764-9326-d7bc1586b918.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-2.5B-chat/1762652580.344106", + "retrieved_timestamp": "1762652580.344107", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meditsolutions/Llama-3.2-SUN-2.5B-chat", + "developer": "meditsolutions", + "inference_platform": "unknown", + "id": "meditsolutions/Llama-3.2-SUN-2.5B-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.560414145578177 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3574734302161124 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07099697885196375 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3155208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1813497340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.472 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct/ac6f2c5a-32b7-4553-acaa-e329f1916c85.json b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct/ac6f2c5a-32b7-4553-acaa-e329f1916c85.json new file mode 100644 index 000000000..5b9c5c9b8 --- /dev/null +++ b/data/hfopenllm_v2/meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct/ac6f2c5a-32b7-4553-acaa-e329f1916c85.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-HDIC-1B-Instruct/1762652580.344357", + "retrieved_timestamp": "1762652580.344363", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct", + "developer": "meditsolutions", + "inference_platform": "unknown", + "id": "meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6826631116548536 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3507731670753292 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.061933534743202415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23657718120805368 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3593645833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16871675531914893 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune/ff57f4fa-eb78-4ef4-9d92-9f160a1b936a.json b/data/hfopenllm_v2/meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune/ff57f4fa-eb78-4ef4-9d92-9f160a1b936a.json new file mode 100644 index 000000000..e5654b0a7 --- /dev/null +++ b/data/hfopenllm_v2/meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune/ff57f4fa-eb78-4ef4-9d92-9f160a1b936a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meditsolutions_MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune/1762652580.344661", + "retrieved_timestamp": "1762652580.344662", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune", + "developer": "meditsolutions", + "inference_platform": "unknown", + "id": "meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36550020611976225 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4034845834509661 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42534374999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21899933510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.646 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge/a7e4718c-c4cf-4c0f-b67f-fd12fa54e4ad.json b/data/hfopenllm_v2/meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge/a7e4718c-c4cf-4c0f-b67f-fd12fa54e4ad.json new file mode 100644 index 000000000..5d2511858 --- /dev/null +++ b/data/hfopenllm_v2/meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge/a7e4718c-c4cf-4c0f-b67f-fd12fa54e4ad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meditsolutions_MSH-v1-Bielik-v2.3-Instruct-MedIT-merge/1762652580.344883", + "retrieved_timestamp": "1762652580.344884", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge", + "developer": "meditsolutions", + "inference_platform": "unknown", + "id": "meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5814217387642566 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5671722290858499 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20770392749244712 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34563758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43845833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3499833776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 11.169 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/MedIT-Mesh-3B-Instruct/89568570-298f-4dc5-9b7b-c9ce84d4010e.json b/data/hfopenllm_v2/meditsolutions/MedIT-Mesh-3B-Instruct/89568570-298f-4dc5-9b7b-c9ce84d4010e.json new file mode 100644 index 000000000..410842fad --- /dev/null +++ b/data/hfopenllm_v2/meditsolutions/MedIT-Mesh-3B-Instruct/89568570-298f-4dc5-9b7b-c9ce84d4010e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meditsolutions_MedIT-Mesh-3B-Instruct/1762652580.345099", + "retrieved_timestamp": "1762652580.345099", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meditsolutions/MedIT-Mesh-3B-Instruct", + "developer": "meditsolutions", + "inference_platform": "unknown", + "id": "meditsolutions/MedIT-Mesh-3B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5814217387642566 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5575523356865378 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20317220543806647 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4047604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4011801861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meditsolutions/SmolLM2-MedIT-Upscale-2B/d78a23ac-c3f1-4ad5-bbd2-ea37faea455f.json b/data/hfopenllm_v2/meditsolutions/SmolLM2-MedIT-Upscale-2B/d78a23ac-c3f1-4ad5-bbd2-ea37faea455f.json new file mode 100644 index 000000000..c7a78fc10 --- /dev/null +++ b/data/hfopenllm_v2/meditsolutions/SmolLM2-MedIT-Upscale-2B/d78a23ac-c3f1-4ad5-bbd2-ea37faea455f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meditsolutions_SmolLM2-MedIT-Upscale-2B/1762652580.3453178", + "retrieved_timestamp": "1762652580.3453188", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meditsolutions/SmolLM2-MedIT-Upscale-2B", + "developer": "meditsolutions", + "inference_platform": "unknown", + "id": "meditsolutions/SmolLM2-MedIT-Upscale-2B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6429207835210575 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3551122445928012 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.055891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33136458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19705784574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.114 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meetkai/functionary-small-v3.1/7312a4c6-85e2-4cb3-9c3e-1dfc039d1c3a.json b/data/hfopenllm_v2/meetkai/functionary-small-v3.1/7312a4c6-85e2-4cb3-9c3e-1dfc039d1c3a.json new file mode 100644 index 000000000..688a6031c --- /dev/null +++ b/data/hfopenllm_v2/meetkai/functionary-small-v3.1/7312a4c6-85e2-4cb3-9c3e-1dfc039d1c3a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meetkai_functionary-small-v3.1/1762652580.345532", + "retrieved_timestamp": "1762652580.345533", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meetkai/functionary-small-v3.1", + "developer": "meetkai", + "inference_platform": "unknown", + "id": "meetkai/functionary-small-v3.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6274584768414474 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4981781042779377 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15709969788519637 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3833645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33485704787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B/c948d98a-af63-43d6-a7c9-9ee61654a239.json b/data/hfopenllm_v2/mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B/c948d98a-af63-43d6-a7c9-9ee61654a239.json new file mode 100644 index 000000000..ed9d0eed8 --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B/c948d98a-af63-43d6-a7c9-9ee61654a239.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mergekit-community_JAJUKA-WEWILLNEVERFORGETYOU-3B/1762652580.346048", + "retrieved_timestamp": "1762652580.346048", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B", + "developer": "mergekit-community", + "inference_platform": "unknown", + "id": "mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49406907006742107 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.436971949757697 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12462235649546828 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36562500000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3032746010638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/VirtuosoSmall-InstructModelStock/8c7e09ef-ac37-4765-9f1e-a1b17ff4b084.json b/data/hfopenllm_v2/mergekit-community/VirtuosoSmall-InstructModelStock/8c7e09ef-ac37-4765-9f1e-a1b17ff4b084.json new file mode 100644 index 000000000..319f5b7f7 --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/VirtuosoSmall-InstructModelStock/8c7e09ef-ac37-4765-9f1e-a1b17ff4b084.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mergekit-community_VirtuosoSmall-InstructModelStock/1762652580.346572", + "retrieved_timestamp": "1762652580.346573", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-community/VirtuosoSmall-InstructModelStock", + "developer": "mergekit-community", + "inference_platform": "unknown", + "id": "mergekit-community/VirtuosoSmall-InstructModelStock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5237946426592552 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6517899193567194 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4093655589123867 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3825503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4755729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5420545212765957 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/diabolic6045_ELN-AOC-CAIN/c87fbaff-133e-4312-87bf-d2fa397d66c4.json b/data/hfopenllm_v2/mergekit-community/diabolic6045_ELN-AOC-CAIN/c87fbaff-133e-4312-87bf-d2fa397d66c4.json new file mode 100644 index 000000000..57ae9775d --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/diabolic6045_ELN-AOC-CAIN/c87fbaff-133e-4312-87bf-d2fa397d66c4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mergekit-community_diabolic6045_ELN-AOC-CAIN/1762652580.346791", + "retrieved_timestamp": "1762652580.346791", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-community/diabolic6045_ELN-AOC-CAIN", + "developer": "mergekit-community", + "inference_platform": "unknown", + "id": "mergekit-community/diabolic6045_ELN-AOC-CAIN" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0861547361002141 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31256779393862577 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36575 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11909906914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/mergekit-dare_ties-ajgjgea/69409961-b60d-4616-8a8e-8d0a9c6c966f.json b/data/hfopenllm_v2/mergekit-community/mergekit-dare_ties-ajgjgea/69409961-b60d-4616-8a8e-8d0a9c6c966f.json new file mode 100644 index 000000000..a930941d5 --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/mergekit-dare_ties-ajgjgea/69409961-b60d-4616-8a8e-8d0a9c6c966f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-dare_ties-ajgjgea/1762652580.347229", + "retrieved_timestamp": "1762652580.34723", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-community/mergekit-dare_ties-ajgjgea", + "developer": "mergekit-community", + "inference_platform": "unknown", + "id": "mergekit-community/mergekit-dare_ties-ajgjgea" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5263423272472595 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3494703687455365 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06419939577039276 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3289166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17436835106382978 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/mergekit-della-zgowfmf/2989b505-bfe2-4ca6-9445-af450ad9bee3.json b/data/hfopenllm_v2/mergekit-community/mergekit-della-zgowfmf/2989b505-bfe2-4ca6-9445-af450ad9bee3.json new file mode 100644 index 000000000..0dc3db466 --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/mergekit-della-zgowfmf/2989b505-bfe2-4ca6-9445-af450ad9bee3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-della-zgowfmf/1762652580.347496", + "retrieved_timestamp": "1762652580.347497", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-community/mergekit-della-zgowfmf", + "developer": "mergekit-community", + "inference_platform": "unknown", + "id": "mergekit-community/mergekit-della-zgowfmf" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4827535383892516 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6590790528029254 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36178247734138974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3901006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4833854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5414727393617021 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/mergekit-model_stock-azgztvm/5a607a63-42bc-4f2b-af2f-4126234516d0.json b/data/hfopenllm_v2/mergekit-community/mergekit-model_stock-azgztvm/5a607a63-42bc-4f2b-af2f-4126234516d0.json new file mode 100644 index 000000000..dc700904d --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/mergekit-model_stock-azgztvm/5a607a63-42bc-4f2b-af2f-4126234516d0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-model_stock-azgztvm/1762652580.347734", + "retrieved_timestamp": "1762652580.347735", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-community/mergekit-model_stock-azgztvm", + "developer": "mergekit-community", + "inference_platform": "unknown", + "id": "mergekit-community/mergekit-model_stock-azgztvm" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5061592131101034 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6542775546755846 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43731117824773413 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38171140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47300000000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5405585106382979 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/mergekit-slerp-fmrazcr/5fd04483-684e-4991-adea-ca5496e05208.json b/data/hfopenllm_v2/mergekit-community/mergekit-slerp-fmrazcr/5fd04483-684e-4991-adea-ca5496e05208.json new file mode 100644 index 000000000..64b3b9819 --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/mergekit-slerp-fmrazcr/5fd04483-684e-4991-adea-ca5496e05208.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-slerp-fmrazcr/1762652580.3479838", + "retrieved_timestamp": "1762652580.3479848", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-community/mergekit-slerp-fmrazcr", + "developer": "mergekit-community", + "inference_platform": "unknown", + "id": "mergekit-community/mergekit-slerp-fmrazcr" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41743241266506204 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5341624678276029 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11933534743202417 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41045833333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3776595744680851 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/mergekit-ties-rraxdhv/bb3ccfe9-1ae3-49ec-9305-9150edaf8527.json b/data/hfopenllm_v2/mergekit-community/mergekit-ties-rraxdhv/bb3ccfe9-1ae3-49ec-9305-9150edaf8527.json new file mode 100644 index 000000000..a1de47f42 --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/mergekit-ties-rraxdhv/bb3ccfe9-1ae3-49ec-9305-9150edaf8527.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-ties-rraxdhv/1762652580.348219", + "retrieved_timestamp": "1762652580.3482199", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-community/mergekit-ties-rraxdhv", + "developer": "mergekit-community", + "inference_platform": "unknown", + "id": "mergekit-community/mergekit-ties-rraxdhv" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11230756614671294 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5183590984128971 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42019791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39095744680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/mergekit-ties-ykqemwr/83a86bdd-4605-44a5-8168-ce88242c4ee6.json b/data/hfopenllm_v2/mergekit-community/mergekit-ties-ykqemwr/83a86bdd-4605-44a5-8168-ce88242c4ee6.json new file mode 100644 index 000000000..d92dcb280 --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/mergekit-ties-ykqemwr/83a86bdd-4605-44a5-8168-ce88242c4ee6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-ties-ykqemwr/1762652580.3485382", + "retrieved_timestamp": "1762652580.3485398", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-community/mergekit-ties-ykqemwr", + "developer": "mergekit-community", + "inference_platform": "unknown", + "id": "mergekit-community/mergekit-ties-ykqemwr" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35995491961329273 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5455496677885336 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12235649546827794 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221476510067114 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4197916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3734208776595745 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mergekit-community/sexeh_time_testing/79cd4642-8b10-416b-8a24-e3e3dc99b28f.json b/data/hfopenllm_v2/mergekit-community/sexeh_time_testing/79cd4642-8b10-416b-8a24-e3e3dc99b28f.json new file mode 100644 index 000000000..8ea2aa006 --- /dev/null +++ b/data/hfopenllm_v2/mergekit-community/sexeh_time_testing/79cd4642-8b10-416b-8a24-e3e3dc99b28f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mergekit-community_sexeh_time_testing/1762652580.348824", + "retrieved_timestamp": "1762652580.348825", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mergekit-community/sexeh_time_testing", + "developer": "mergekit-community", + "inference_platform": "unknown", + "id": "mergekit-community/sexeh_time_testing" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7329463601023063 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5241321549202608 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08987915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36190625000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36668882978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-2-13b-chat-hf/1d97c368-3e12-43d4-afb2-e3977bf7cf35.json b/data/hfopenllm_v2/meta-llama/Llama-2-13b-chat-hf/1d97c368-3e12-43d4-afb2-e3977bf7cf35.json new file mode 100644 index 000000000..5ddce8857 --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-2-13b-chat-hf/1d97c368-3e12-43d4-afb2-e3977bf7cf35.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-13b-chat-hf/1762652580.34908", + "retrieved_timestamp": "1762652580.349081", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama/Llama-2-13b-chat-hf", + "developer": "meta-llama", + "inference_platform": "unknown", + "id": "meta-llama/Llama-2-13b-chat-hf" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.398472719052115 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33427367066714186 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23154362416107382 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40072916666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19232047872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.016 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-2-70b-chat-hf/51411c24-49a4-48a7-9079-1f8c06e5318f.json b/data/hfopenllm_v2/meta-llama/Llama-2-70b-chat-hf/51411c24-49a4-48a7-9079-1f8c06e5318f.json new file mode 100644 index 000000000..15050cd07 --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-2-70b-chat-hf/51411c24-49a4-48a7-9079-1f8c06e5318f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-70b-chat-hf/1762652580.3497758", + "retrieved_timestamp": "1762652580.349777", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama/Llama-2-70b-chat-hf", + "developer": "meta-llama", + "inference_platform": "unknown", + "id": "meta-llama/Llama-2-70b-chat-hf" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49579227560650185 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30424741461642657 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3686666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2432679521276596 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 68.977 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-2-7b-chat-hf/3c870b5c-ab3f-4a21-836a-655d0e30efb9.json b/data/hfopenllm_v2/meta-llama/Llama-2-7b-chat-hf/3c870b5c-ab3f-4a21-836a-655d0e30efb9.json new file mode 100644 index 000000000..00ec9e8f8 --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-2-7b-chat-hf/3c870b5c-ab3f-4a21-836a-655d0e30efb9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-7b-chat-hf/1762652580.350235", + "retrieved_timestamp": "1762652580.350236", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama/Llama-2-7b-chat-hf", + "developer": "meta-llama", + "inference_platform": "unknown", + "id": "meta-llama/Llama-2-7b-chat-hf" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3986478100329348 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3113546355002185 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3675520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16879986702127658 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-3.1-70B-Instruct/5623efdd-2f43-49d3-9e89-21432db474f4.json b/data/hfopenllm_v2/meta-llama/Llama-3.1-70B-Instruct/5623efdd-2f43-49d3-9e89-21432db474f4.json new file mode 100644 index 000000000..db2fc26c5 --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-3.1-70B-Instruct/5623efdd-2f43-49d3-9e89-21432db474f4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.1-70B-Instruct/1762652580.35089", + "retrieved_timestamp": "1762652580.350891", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama/Llama-3.1-70B-Instruct", + "developer": "meta-llama", + "inference_platform": "unknown", + "id": "meta-llama/Llama-3.1-70B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8668854195756149 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6917287453663654 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3806646525679758 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3565436241610738 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45806250000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5309175531914894 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-3.1-8B-Instruct/b5009142-e716-45b2-877e-9259a3a705da.json b/data/hfopenllm_v2/meta-llama/Llama-3.1-8B-Instruct/b5009142-e716-45b2-877e-9259a3a705da.json new file mode 100644 index 000000000..ca2462223 --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-3.1-8B-Instruct/b5009142-e716-45b2-877e-9259a3a705da.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.1-8B-Instruct/1762652580.351296", + "retrieved_timestamp": "1762652580.3512971", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama/Llama-3.1-8B-Instruct", + "developer": "meta-llama", + "inference_platform": "unknown", + "id": "meta-llama/Llama-3.1-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4921707735475206 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5087032184331889 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1555891238670695 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39715625000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37982047872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-3.2-1B-Instruct/b21f94af-3dfd-42f6-a380-3c5faebc90d8.json b/data/hfopenllm_v2/meta-llama/Llama-3.2-1B-Instruct/b21f94af-3dfd-42f6-a380-3c5faebc90d8.json new file mode 100644 index 000000000..1c9da2218 --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-3.2-1B-Instruct/b21f94af-3dfd-42f6-a380-3c5faebc90d8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.2-1B-Instruct/1762652580.351711", + "retrieved_timestamp": "1762652580.351712", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama/Llama-3.2-1B-Instruct", + "developer": "meta-llama", + "inference_platform": "unknown", + "id": "meta-llama/Llama-3.2-1B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5698313807364459 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34968498061768266 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0702416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3328541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16821808510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.24 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-3.2-3B-Instruct/ec976588-9788-45e0-ae89-4682e3c8799a.json b/data/hfopenllm_v2/meta-llama/Llama-3.2-3B-Instruct/ec976588-9788-45e0-ae89-4682e3c8799a.json new file mode 100644 index 000000000..6e2e49838 --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-3.2-3B-Instruct/ec976588-9788-45e0-ae89-4682e3c8799a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.2-3B-Instruct/1762652580.352124", + "retrieved_timestamp": "1762652580.352124", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama/Llama-3.2-3B-Instruct", + "developer": "meta-llama", + "inference_platform": "unknown", + "id": "meta-llama/Llama-3.2-3B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7393161256576994 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4610070239466069 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17673716012084592 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3528541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3194813829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Llama-3.3-70B-Instruct/b227d987-1bec-4124-955a-d81e2e2a52f6.json b/data/hfopenllm_v2/meta-llama/Llama-3.3-70B-Instruct/b227d987-1bec-4124-955a-d81e2e2a52f6.json new file mode 100644 index 000000000..24d045745 --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Llama-3.3-70B-Instruct/b227d987-1bec-4124-955a-d81e2e2a52f6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.3-70B-Instruct/1762652580.352333", + "retrieved_timestamp": "1762652580.352334", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama/Llama-3.3-70B-Instruct", + "developer": "meta-llama", + "inference_platform": "unknown", + "id": "meta-llama/Llama-3.3-70B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8997581971391464 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6919312828325811 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48338368580060426 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288590604026846 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44612500000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5331615691489362 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Meta-Llama-3-70B-Instruct/5a0ae810-10a3-4497-a81c-a88d2106a5ba.json b/data/hfopenllm_v2/meta-llama/Meta-Llama-3-70B-Instruct/5a0ae810-10a3-4497-a81c-a88d2106a5ba.json new file mode 100644 index 000000000..45811e39d --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Meta-Llama-3-70B-Instruct/5a0ae810-10a3-4497-a81c-a88d2106a5ba.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meta-llama_Meta-Llama-3-70B-Instruct/1762652580.352748", + "retrieved_timestamp": "1762652580.352749", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama/Meta-Llama-3-70B-Instruct", + "developer": "meta-llama", + "inference_platform": "unknown", + "id": "meta-llama/Meta-Llama-3-70B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8099077115387172 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6546699432372051 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24471299093655588 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4153645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5206948138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/108befbc-f9a6-4d5f-9bcf-30fe7cebe35b.json b/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/108befbc-f9a6-4d5f-9bcf-30fe7cebe35b.json new file mode 100644 index 000000000..991fe36ef --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/108befbc-f9a6-4d5f-9bcf-30fe7cebe35b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meta-llama_Meta-Llama-3-8B-Instruct/1762652580.353369", + "retrieved_timestamp": "1762652580.353369", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama/Meta-Llama-3-8B-Instruct", + "developer": "meta-llama", + "inference_platform": "unknown", + "id": "meta-llama/Meta-Llama-3-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47823220166934843 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4910264175128683 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09138972809667674 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3805416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.359125664893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/df2fd3a3-33d0-4ee8-be73-e8d3e00e8184.json b/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/df2fd3a3-33d0-4ee8-be73-e8d3e00e8184.json new file mode 100644 index 000000000..da6a7c252 --- /dev/null +++ b/data/hfopenllm_v2/meta-llama/Meta-Llama-3-8B-Instruct/df2fd3a3-33d0-4ee8-be73-e8d3e00e8184.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meta-llama_Meta-Llama-3-8B-Instruct/1762652580.353163", + "retrieved_timestamp": "1762652580.353164", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama/Meta-Llama-3-8B-Instruct", + "developer": "meta-llama", + "inference_platform": "unknown", + "id": "meta-llama/Meta-Llama-3-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7408398604591373 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49887111136169526 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08685800604229607 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3568229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3664394946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1/0851ad0a-7f87-48c8-943a-198ad2ef8ea3.json b/data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1/0851ad0a-7f87-48c8-943a-198ad2ef8ea3.json new file mode 100644 index 000000000..7107f8a8c --- /dev/null +++ b/data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1/0851ad0a-7f87-48c8-943a-198ad2ef8ea3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/3rd-Degree-Burn_Llama-3.1-8B-Squareroot-v1/1762652579.470921", + "retrieved_timestamp": "1762652579.470922", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1", + "developer": "meta", + "inference_platform": "unknown", + "id": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2892381104358657 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33427703119251256 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08836858006042296 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3340625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11269946808510638 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-3.1-8B-Squareroot/cbe8101a-f057-4151-9391-dbd883f4c09e.json b/data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-3.1-8B-Squareroot/cbe8101a-f057-4151-9391-dbd883f4c09e.json new file mode 100644 index 000000000..6a1e9e80c --- /dev/null +++ b/data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-3.1-8B-Squareroot/cbe8101a-f057-4151-9391-dbd883f4c09e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/3rd-Degree-Burn_Llama-3.1-8B-Squareroot/1762652579.47045", + "retrieved_timestamp": "1762652579.4704509", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot", + "developer": "meta", + "inference_platform": "unknown", + "id": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22134381219608418 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34609423326328875 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26586102719033233 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3089166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17495013297872342 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-Squared-8B/fae2328b-af2f-49ff-a817-9406cf40c3d0.json b/data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-Squared-8B/fae2328b-af2f-49ff-a817-9406cf40c3d0.json new file mode 100644 index 000000000..d1a9df305 --- /dev/null +++ b/data/hfopenllm_v2/meta/3rd-Degree-Burn/Llama-Squared-8B/fae2328b-af2f-49ff-a817-9406cf40c3d0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/3rd-Degree-Burn_Llama-Squared-8B/1762652579.471144", + "retrieved_timestamp": "1762652579.471145", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "3rd-Degree-Burn/Llama-Squared-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "3rd-Degree-Burn/Llama-Squared-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27552449722292405 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4431025683868353 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30894791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2366190159574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/AGI-0/Artificium-llama3.1-8B-001/2e3e8be1-725f-4662-a8b1-da4437018e31.json b/data/hfopenllm_v2/meta/AGI-0/Artificium-llama3.1-8B-001/2e3e8be1-725f-4662-a8b1-da4437018e31.json new file mode 100644 index 000000000..213ce77b1 --- /dev/null +++ b/data/hfopenllm_v2/meta/AGI-0/Artificium-llama3.1-8B-001/2e3e8be1-725f-4662-a8b1-da4437018e31.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AGI-0_Artificium-llama3.1-8B-001/1762652579.4738402", + "retrieved_timestamp": "1762652579.473841", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AGI-0/Artificium-llama3.1-8B-001", + "developer": "meta", + "inference_platform": "unknown", + "id": "AGI-0/Artificium-llama3.1-8B-001" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5247687247614108 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42562150225923556 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13595166163141995 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3794583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3181515957446808 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/AGI-0/smartllama3.1-8B-001/c97c2d67-79d5-4813-8569-64eaefe66f89.json b/data/hfopenllm_v2/meta/AGI-0/smartllama3.1-8B-001/c97c2d67-79d5-4813-8569-64eaefe66f89.json new file mode 100644 index 000000000..3af95a3ed --- /dev/null +++ b/data/hfopenllm_v2/meta/AGI-0/smartllama3.1-8B-001/c97c2d67-79d5-4813-8569-64eaefe66f89.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AGI-0_smartllama3.1-8B-001/1762652579.4741051", + "retrieved_timestamp": "1762652579.474106", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AGI-0/smartllama3.1-8B-001", + "developer": "meta", + "inference_platform": "unknown", + "id": "AGI-0/smartllama3.1-8B-001" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35178659290682057 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46701787510868176 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43864583333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3486535904255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1/1d33cf05-9690-41ba-9288-5f39e5b3c17d.json b/data/hfopenllm_v2/meta/ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1/1d33cf05-9690-41ba-9288-5f39e5b3c17d.json new file mode 100644 index 000000000..b7cc0d1bb --- /dev/null +++ b/data/hfopenllm_v2/meta/ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1/1d33cf05-9690-41ba-9288-5f39e5b3c17d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ArliAI_Llama-3.1-8B-ArliAI-RPMax-v1.1/1762652579.4817438", + "retrieved_timestamp": "1762652579.481745", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6359016298975606 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5015613456039083 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13141993957703926 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3576875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35513630319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Azure99/blossom-v5-llama3-8b/19a6e24f-819e-480f-a15f-90273a0a06c5.json b/data/hfopenllm_v2/meta/Azure99/blossom-v5-llama3-8b/19a6e24f-819e-480f-a15f-90273a0a06c5.json new file mode 100644 index 000000000..1333fff9f --- /dev/null +++ b/data/hfopenllm_v2/meta/Azure99/blossom-v5-llama3-8b/19a6e24f-819e-480f-a15f-90273a0a06c5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Azure99_blossom-v5-llama3-8b/1762652579.486878", + "retrieved_timestamp": "1762652579.486878", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Azure99/blossom-v5-llama3-8b", + "developer": "meta", + "inference_platform": "unknown", + "id": "Azure99/blossom-v5-llama3-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.434293230849701 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4184909197087261 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36702083333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2205784574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/BEE-spoke-data/Meta-Llama-3-8Bee/ae5f1f84-091a-4f80-ae40-92ada7e04f94.json b/data/hfopenllm_v2/meta/BEE-spoke-data/Meta-Llama-3-8Bee/ae5f1f84-091a-4f80-ae40-92ada7e04f94.json new file mode 100644 index 000000000..915c16325 --- /dev/null +++ b/data/hfopenllm_v2/meta/BEE-spoke-data/Meta-Llama-3-8Bee/ae5f1f84-091a-4f80-ae40-92ada7e04f94.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BEE-spoke-data_Meta-Llama-3-8Bee/1762652579.491223", + "retrieved_timestamp": "1762652579.491224", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BEE-spoke-data/Meta-Llama-3-8Bee", + "developer": "meta", + "inference_platform": "unknown", + "id": "BEE-spoke-data/Meta-Llama-3-8Bee" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19506575885317623 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46263641905752745 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04833836858006042 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36540625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32197473404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-101M-GQA/3c1f129b-4f54-4187-876b-c93942179125.json b/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-101M-GQA/3c1f129b-4f54-4187-876b-c93942179125.json new file mode 100644 index 000000000..c29feed8c --- /dev/null +++ b/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-101M-GQA/3c1f129b-4f54-4187-876b-c93942179125.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BEE-spoke-data_smol_llama-101M-GQA/1762652579.491745", + "retrieved_timestamp": "1762652579.491746", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BEE-spoke-data/smol_llama-101M-GQA", + "developer": "meta", + "inference_platform": "unknown", + "id": "BEE-spoke-data/smol_llama-101M-GQA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13843712460715346 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3017560771912554 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3712708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11070478723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.101 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu/03c78dad-b50d-4f80-91f8-bd8fbb87235d.json b/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu/03c78dad-b50d-4f80-91f8-bd8fbb87235d.json new file mode 100644 index 000000000..b095d6fba --- /dev/null +++ b/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu/03c78dad-b50d-4f80-91f8-bd8fbb87235d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BEE-spoke-data_smol_llama-220M-GQA-fineweb_edu/1762652579.492168", + "retrieved_timestamp": "1762652579.492168", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu", + "developer": "meta", + "inference_platform": "unknown", + "id": "BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19881248420856662 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29290517164510593 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4367604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11269946808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.218 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-GQA/26596bba-b99d-417f-87be-91de8fa528d3.json b/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-GQA/26596bba-b99d-417f-87be-91de8fa528d3.json new file mode 100644 index 000000000..829fc1092 --- /dev/null +++ b/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-GQA/26596bba-b99d-417f-87be-91de8fa528d3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BEE-spoke-data_smol_llama-220M-GQA/1762652579.491959", + "retrieved_timestamp": "1762652579.49196", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BEE-spoke-data/smol_llama-220M-GQA", + "developer": "meta", + "inference_platform": "unknown", + "id": "BEE-spoke-data/smol_llama-220M-GQA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23860468002677343 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30316731388708956 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.405875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1149434840425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.218 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-openhermes/a0de28f1-8186-4eef-b5b4-ce6da71d8271.json b/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-openhermes/a0de28f1-8186-4eef-b5b4-ce6da71d8271.json new file mode 100644 index 000000000..e5e37fc1b --- /dev/null +++ b/data/hfopenllm_v2/meta/BEE-spoke-data/smol_llama-220M-openhermes/a0de28f1-8186-4eef-b5b4-ce6da71d8271.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BEE-spoke-data_smol_llama-220M-openhermes/1762652579.4923809", + "retrieved_timestamp": "1762652579.492382", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BEE-spoke-data/smol_llama-220M-openhermes", + "developer": "meta", + "inference_platform": "unknown", + "id": "BEE-spoke-data/smol_llama-220M-openhermes" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1555229014570229 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30275191401927726 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3847291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11203457446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.218 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Ba2han/Llama-Phi-3_DoRA/99c4e277-7a0f-4c0c-ac19-25fe6b706a4a.json b/data/hfopenllm_v2/meta/Ba2han/Llama-Phi-3_DoRA/99c4e277-7a0f-4c0c-ac19-25fe6b706a4a.json new file mode 100644 index 000000000..16b631a60 --- /dev/null +++ b/data/hfopenllm_v2/meta/Ba2han/Llama-Phi-3_DoRA/99c4e277-7a0f-4c0c-ac19-25fe6b706a4a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Ba2han_Llama-Phi-3_DoRA/1762652579.4940102", + "retrieved_timestamp": "1762652579.494011", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Ba2han/Llama-Phi-3_DoRA", + "developer": "meta", + "inference_platform": "unknown", + "id": "Ba2han/Llama-Phi-3_DoRA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5130531434371911 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5514558259029191 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1216012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3263422818791946 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40692708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39153922872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge/f852dab4-9c5a-4fb9-99c2-951e7d2300d0.json b/data/hfopenllm_v2/meta/BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge/f852dab4-9c5a-4fb9-99c2-951e7d2300d0.json new file mode 100644 index 000000000..9444d4b99 --- /dev/null +++ b/data/hfopenllm_v2/meta/BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge/f852dab4-9c5a-4fb9-99c2-951e7d2300d0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BlackBeenie_Llama-3.1-8B-pythonic-passthrough-merge/1762652579.495604", + "retrieved_timestamp": "1762652579.495605", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge", + "developer": "meta", + "inference_platform": "unknown", + "id": "BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23158552640327662 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3453848032699584 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.011329305135951661 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37781249999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1332280585106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 20.245 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/BlackBeenie/Neos-Llama-3.1-8B/904e3917-3bfd-4c83-8088-6b5ac596e7ea.json b/data/hfopenllm_v2/meta/BlackBeenie/Neos-Llama-3.1-8B/904e3917-3bfd-4c83-8088-6b5ac596e7ea.json new file mode 100644 index 000000000..996a3766d --- /dev/null +++ b/data/hfopenllm_v2/meta/BlackBeenie/Neos-Llama-3.1-8B/904e3917-3bfd-4c83-8088-6b5ac596e7ea.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BlackBeenie_Neos-Llama-3.1-8B/1762652579.496156", + "retrieved_timestamp": "1762652579.496157", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BlackBeenie/Neos-Llama-3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "BlackBeenie/Neos-Llama-3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49439376410147295 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4424998411442879 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13217522658610273 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3749895833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32621343085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/BlackBeenie/Neos-Llama-3.1-base/ec9c46a6-a0e9-4174-8ebe-ce33d5eeb27d.json b/data/hfopenllm_v2/meta/BlackBeenie/Neos-Llama-3.1-base/ec9c46a6-a0e9-4174-8ebe-ce33d5eeb27d.json new file mode 100644 index 000000000..92ba31115 --- /dev/null +++ b/data/hfopenllm_v2/meta/BlackBeenie/Neos-Llama-3.1-base/ec9c46a6-a0e9-4174-8ebe-ce33d5eeb27d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BlackBeenie_Neos-Llama-3.1-base/1762652579.496382", + "retrieved_timestamp": "1762652579.496383", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BlackBeenie/Neos-Llama-3.1-base", + "developer": "meta", + "inference_platform": "unknown", + "id": "BlackBeenie/Neos-Llama-3.1-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17508211545366295 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29303397468240516 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23741610738255034 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34990625000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11120345744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.65 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/BlackBeenie/llama-3-luminous-merged/9ca4809e-2bf0-477e-b960-64718561583b.json b/data/hfopenllm_v2/meta/BlackBeenie/llama-3-luminous-merged/9ca4809e-2bf0-477e-b960-64718561583b.json new file mode 100644 index 000000000..e09f3f47f --- /dev/null +++ b/data/hfopenllm_v2/meta/BlackBeenie/llama-3-luminous-merged/9ca4809e-2bf0-477e-b960-64718561583b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BlackBeenie_llama-3-luminous-merged/1762652579.496879", + "retrieved_timestamp": "1762652579.49688", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BlackBeenie/llama-3-luminous-merged", + "developer": "meta", + "inference_platform": "unknown", + "id": "BlackBeenie/llama-3-luminous-merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43234506664538974 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5153924501559338 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08685800604229607 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4148958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3773271276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco/7f8d4c8c-4877-4b2f-a0fe-7817894daa79.json b/data/hfopenllm_v2/meta/BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco/7f8d4c8c-4877-4b2f-a0fe-7817894daa79.json new file mode 100644 index 000000000..2a9086614 --- /dev/null +++ b/data/hfopenllm_v2/meta/BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco/7f8d4c8c-4877-4b2f-a0fe-7817894daa79.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BlackBeenie_llama-3.1-8B-Galore-openassistant-guanaco/1762652579.4970949", + "retrieved_timestamp": "1762652579.4970958", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco", + "developer": "meta", + "inference_platform": "unknown", + "id": "BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634842218646525 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5213365363748029 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44062500000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32064494680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Bllossom/llama-3.2-Korean-Bllossom-AICA-5B/e2668c3c-a862-4564-acee-3c3ce439f74f.json b/data/hfopenllm_v2/meta/Bllossom/llama-3.2-Korean-Bllossom-AICA-5B/e2668c3c-a862-4564-acee-3c3ce439f74f.json new file mode 100644 index 000000000..060ac7d5b --- /dev/null +++ b/data/hfopenllm_v2/meta/Bllossom/llama-3.2-Korean-Bllossom-AICA-5B/e2668c3c-a862-4564-acee-3c3ce439f74f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Bllossom_llama-3.2-Korean-Bllossom-AICA-5B/1762652579.497314", + "retrieved_timestamp": "1762652579.497314", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Bllossom/llama-3.2-Korean-Bllossom-AICA-5B", + "developer": "meta", + "inference_platform": "unknown", + "id": "Bllossom/llama-3.2-Korean-Bllossom-AICA-5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5172497861230424 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42930745041520607 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12386706948640483 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3833958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27102726063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MllamaForConditionalGeneration", + "params_billions": 5.199 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/BrainWave-ML/llama3.2-3B-maths-orpo/979ef5b7-12cb-4e4d-81c7-9e6fcb1d6cef.json b/data/hfopenllm_v2/meta/BrainWave-ML/llama3.2-3B-maths-orpo/979ef5b7-12cb-4e4d-81c7-9e6fcb1d6cef.json new file mode 100644 index 000000000..dcc96dc5c --- /dev/null +++ b/data/hfopenllm_v2/meta/BrainWave-ML/llama3.2-3B-maths-orpo/979ef5b7-12cb-4e4d-81c7-9e6fcb1d6cef.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BrainWave-ML_llama3.2-3B-maths-orpo/1762652579.499409", + "retrieved_timestamp": "1762652579.49941", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BrainWave-ML/llama3.2-3B-maths-orpo", + "developer": "meta", + "inference_platform": "unknown", + "id": "BrainWave-ML/llama3.2-3B-maths-orpo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20490742341431845 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911778102988436 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35753125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11677194148936171 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/CYFRAGOVPL/Llama-PLLuM-8B-base/01484796-f32b-43fe-b865-517b1a5c0b10.json b/data/hfopenllm_v2/meta/CYFRAGOVPL/Llama-PLLuM-8B-base/01484796-f32b-43fe-b865-517b1a5c0b10.json new file mode 100644 index 000000000..f16b88993 --- /dev/null +++ b/data/hfopenllm_v2/meta/CYFRAGOVPL/Llama-PLLuM-8B-base/01484796-f32b-43fe-b865-517b1a5c0b10.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_Llama-PLLuM-8B-base/1762652579.500559", + "retrieved_timestamp": "1762652579.5005598", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CYFRAGOVPL/Llama-PLLuM-8B-base", + "developer": "meta", + "inference_platform": "unknown", + "id": "CYFRAGOVPL/Llama-PLLuM-8B-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28988749850396944 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43204480458140976 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03625377643504532 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39703125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27568151595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0/c256cede-47bb-487d-9de2-ae7352faa165.json b/data/hfopenllm_v2/meta/Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0/c256cede-47bb-487d-9de2-ae7352faa165.json new file mode 100644 index 000000000..7d97c70f3 --- /dev/null +++ b/data/hfopenllm_v2/meta/Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0/c256cede-47bb-487d-9de2-ae7352faa165.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-LLaMA-3-8b-odpo-v1.0/1762652579.5080209", + "retrieved_timestamp": "1762652579.508022", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0", + "developer": "meta", + "inference_platform": "unknown", + "id": "Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39679938119744496 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5023929881802022 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40575 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3152426861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ContactDoctor/Bio-Medical-Llama-3-8B/42a3e3b7-b8e3-4470-b1a6-4a3daa146484.json b/data/hfopenllm_v2/meta/ContactDoctor/Bio-Medical-Llama-3-8B/42a3e3b7-b8e3-4470-b1a6-4a3daa146484.json new file mode 100644 index 000000000..612bec596 --- /dev/null +++ b/data/hfopenllm_v2/meta/ContactDoctor/Bio-Medical-Llama-3-8B/42a3e3b7-b8e3-4470-b1a6-4a3daa146484.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ContactDoctor_Bio-Medical-Llama-3-8B/1762652579.510189", + "retrieved_timestamp": "1762652579.510189", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ContactDoctor/Bio-Medical-Llama-3-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "ContactDoctor/Bio-Medical-Llama-3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4422365988909427 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.486311802622738 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06722054380664652 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3338926174496644 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35139583333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36477726063829785 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Corianas/llama-3-reactor/0670ba93-c3d6-4a74-94e4-4a77311d4984.json b/data/hfopenllm_v2/meta/Corianas/llama-3-reactor/0670ba93-c3d6-4a74-94e4-4a77311d4984.json new file mode 100644 index 000000000..a5af340b3 --- /dev/null +++ b/data/hfopenllm_v2/meta/Corianas/llama-3-reactor/0670ba93-c3d6-4a74-94e4-4a77311d4984.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Corianas_llama-3-reactor/1762652579.5122728", + "retrieved_timestamp": "1762652579.512274", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Corianas/llama-3-reactor", + "developer": "meta", + "inference_platform": "unknown", + "id": "Corianas/llama-3-reactor" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23001192391742797 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4457148560545015 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04682779456193353 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39771874999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2800864361702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": -1.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/CreitinGameplays/Llama-3.1-8B-R1-v0.1/a4b935d4-1664-44e4-ad82-639755c2b909.json b/data/hfopenllm_v2/meta/CreitinGameplays/Llama-3.1-8B-R1-v0.1/a4b935d4-1664-44e4-ad82-639755c2b909.json new file mode 100644 index 000000000..6d34aea38 --- /dev/null +++ b/data/hfopenllm_v2/meta/CreitinGameplays/Llama-3.1-8B-R1-v0.1/a4b935d4-1664-44e4-ad82-639755c2b909.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/CreitinGameplays_Llama-3.1-8B-R1-v0.1/1762652579.514677", + "retrieved_timestamp": "1762652579.514678", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "CreitinGameplays/Llama-3.1-8B-R1-v0.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "CreitinGameplays/Llama-3.1-8B-R1-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.323485019747603 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3057485865545513 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18126888217522658 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36215624999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12516622340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Daemontatox/Llama3.3-70B-CogniLink/20b46645-a1dd-4974-9ad1-444f8ca78481.json b/data/hfopenllm_v2/meta/Daemontatox/Llama3.3-70B-CogniLink/20b46645-a1dd-4974-9ad1-444f8ca78481.json new file mode 100644 index 000000000..0635f0137 --- /dev/null +++ b/data/hfopenllm_v2/meta/Daemontatox/Llama3.3-70B-CogniLink/20b46645-a1dd-4974-9ad1-444f8ca78481.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_Llama3.3-70B-CogniLink/1762652579.527427", + "retrieved_timestamp": "1762652579.5274282", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/Llama3.3-70B-CogniLink", + "developer": "meta", + "inference_platform": "unknown", + "id": "Daemontatox/Llama3.3-70B-CogniLink" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6931042965996888 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.666832775829349 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41389728096676737 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44546979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4876979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5172872340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Daemontatox/Llama_cot/01a0a741-5f78-4c31-a743-8e42ba73a22d.json b/data/hfopenllm_v2/meta/Daemontatox/Llama_cot/01a0a741-5f78-4c31-a743-8e42ba73a22d.json new file mode 100644 index 000000000..95fdfaf32 --- /dev/null +++ b/data/hfopenllm_v2/meta/Daemontatox/Llama_cot/01a0a741-5f78-4c31-a743-8e42ba73a22d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_Llama_cot/1762652579.527702", + "retrieved_timestamp": "1762652579.527703", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/Llama_cot", + "developer": "meta", + "inference_platform": "unknown", + "id": "Daemontatox/Llama_cot" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7548781677061308 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4838374335391873 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20241691842900303 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3872395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.351811835106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MllamaForConditionalGeneration", + "params_billions": 10.67 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Danielbrdz/Barcenas-Llama3-8b-ORPO/83f9e48d-919e-42ec-8ea4-cc933a1b98f5.json b/data/hfopenllm_v2/meta/Danielbrdz/Barcenas-Llama3-8b-ORPO/83f9e48d-919e-42ec-8ea4-cc933a1b98f5.json new file mode 100644 index 000000000..4b2415743 --- /dev/null +++ b/data/hfopenllm_v2/meta/Danielbrdz/Barcenas-Llama3-8b-ORPO/83f9e48d-919e-42ec-8ea4-cc933a1b98f5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-Llama3-8b-ORPO/1762652579.534392", + "retrieved_timestamp": "1762652579.534392", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Danielbrdz/Barcenas-Llama3-8b-ORPO", + "developer": "meta", + "inference_platform": "unknown", + "id": "Danielbrdz/Barcenas-Llama3-8b-ORPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.737242738156979 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49865578559911244 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06570996978851963 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4189583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3829787234042553 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm/e2d5ee61-4d0a-4925-b3bf-016b8ff6b1b9.json b/data/hfopenllm_v2/meta/DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm/e2d5ee61-4d0a-4925-b3bf-016b8ff6b1b9.json new file mode 100644 index 000000000..d229f6a75 --- /dev/null +++ b/data/hfopenllm_v2/meta/DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm/e2d5ee61-4d0a-4925-b3bf-016b8ff6b1b9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm/1762652579.537201", + "retrieved_timestamp": "1762652579.537202", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm", + "developer": "meta", + "inference_platform": "unknown", + "id": "DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31356799957446246 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4762231983114653 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10574018126888217 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39278125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3208942819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 16.537 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B/5e116cf4-1be5-44aa-b266-494b1e4127d3.json b/data/hfopenllm_v2/meta/DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B/5e116cf4-1be5-44aa-b266-494b1e4127d3.json new file mode 100644 index 000000000..e5818dd9b --- /dev/null +++ b/data/hfopenllm_v2/meta/DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B/5e116cf4-1be5-44aa-b266-494b1e4127d3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B/1762652579.5376909", + "retrieved_timestamp": "1762652579.537696", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36849780803822746 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.488693862545088 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06570996978851963 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43197916666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2976230053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B/a3b69c21-b6bf-4bf9-9097-ebb26c586829.json b/data/hfopenllm_v2/meta/DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B/a3b69c21-b6bf-4bf9-9097-ebb26c586829.json new file mode 100644 index 000000000..efb688051 --- /dev/null +++ b/data/hfopenllm_v2/meta/DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B/a3b69c21-b6bf-4bf9-9097-ebb26c586829.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B/1762652579.538059", + "retrieved_timestamp": "1762652579.53806", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B", + "developer": "meta", + "inference_platform": "unknown", + "id": "DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2506948230694557 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44878062698346727 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41644791666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709441489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 15.664 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B/d827463a-19cd-4bf2-8823-399b22b57387.json b/data/hfopenllm_v2/meta/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B/d827463a-19cd-4bf2-8823-399b22b57387.json new file mode 100644 index 000000000..90404d824 --- /dev/null +++ b/data/hfopenllm_v2/meta/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B/d827463a-19cd-4bf2-8823-399b22b57387.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B/1762652579.5383239", + "retrieved_timestamp": "1762652579.538326", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B", + "developer": "meta", + "inference_platform": "unknown", + "id": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3882564927725103 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48860331670972784 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08157099697885196 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30244348404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.942 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B/efad116f-dfc7-4a63-95b1-c61655cd7f0c.json b/data/hfopenllm_v2/meta/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B/efad116f-dfc7-4a63-95b1-c61655cd7f0c.json new file mode 100644 index 000000000..df37c4b3f --- /dev/null +++ b/data/hfopenllm_v2/meta/DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B/efad116f-dfc7-4a63-95b1-c61655cd7f0c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B/1762652579.538624", + "retrieved_timestamp": "1762652579.538625", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B", + "developer": "meta", + "inference_platform": "unknown", + "id": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3436182662003484 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47693843531787744 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0755287009063444 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.337248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4230833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29695811170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.942 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B/5af2dce8-b12c-474c-b9e2-b5a38687772d.json b/data/hfopenllm_v2/meta/DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B/5af2dce8-b12c-474c-b9e2-b5a38687772d.json new file mode 100644 index 000000000..7e1e592c0 --- /dev/null +++ b/data/hfopenllm_v2/meta/DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B/5af2dce8-b12c-474c-b9e2-b5a38687772d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B/1762652579.539129", + "retrieved_timestamp": "1762652579.539129", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B", + "developer": "meta", + "inference_platform": "unknown", + "id": "DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2853162940996556 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44623832540838126 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.017371601208459216 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.417875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2777593085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 16.537 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B/f2b1fc61-a1c4-431c-b507-7d222ac3aedc.json b/data/hfopenllm_v2/meta/DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B/f2b1fc61-a1c4-431c-b507-7d222ac3aedc.json new file mode 100644 index 000000000..e981e965d --- /dev/null +++ b/data/hfopenllm_v2/meta/DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B/f2b1fc61-a1c4-431c-b507-7d222ac3aedc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavidAU_DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B/1762652579.5393531", + "retrieved_timestamp": "1762652579.539354", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B", + "developer": "meta", + "inference_platform": "unknown", + "id": "DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3793135547015253 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4232300476265338 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10800604229607251 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3559791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2720246010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 18.405 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter0/62d01464-4163-432c-a017-bedf41cba649.json b/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter0/62d01464-4163-432c-a017-bedf41cba649.json new file mode 100644 index 000000000..17bdfefed --- /dev/null +++ b/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter0/62d01464-4163-432c-a017-bedf41cba649.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter0/1762652579.5443351", + "retrieved_timestamp": "1762652579.5443368", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavieLion/Llama-3.2-1B-SPIN-iter0", + "developer": "meta", + "inference_platform": "unknown", + "id": "DavieLion/Llama-3.2-1B-SPIN-iter0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15067687070306784 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29300816789978756 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3565416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11253324468085106 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter0/a9771320-cc89-43fc-b398-7797505bc4e2.json b/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter0/a9771320-cc89-43fc-b398-7797505bc4e2.json new file mode 100644 index 000000000..f2fb7966e --- /dev/null +++ b/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter0/a9771320-cc89-43fc-b398-7797505bc4e2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter0/1762652579.544659", + "retrieved_timestamp": "1762652579.5446599", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavieLion/Llama-3.2-1B-SPIN-iter0", + "developer": "meta", + "inference_platform": "unknown", + "id": "DavieLion/Llama-3.2-1B-SPIN-iter0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15492338107332987 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29372614029730437 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3564791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11278257978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter1/c380c4b0-7804-4b59-a7e4-700f0a7122b3.json b/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter1/c380c4b0-7804-4b59-a7e4-700f0a7122b3.json new file mode 100644 index 000000000..f231a16b2 --- /dev/null +++ b/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter1/c380c4b0-7804-4b59-a7e4-700f0a7122b3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter1/1762652579.5448809", + "retrieved_timestamp": "1762652579.5448818", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavieLion/Llama-3.2-1B-SPIN-iter1", + "developer": "meta", + "inference_platform": "unknown", + "id": "DavieLion/Llama-3.2-1B-SPIN-iter1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15754642127333254 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29402546232087917 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0022658610271903325 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25083892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3646041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11178523936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter2/5723e611-e7e0-47c0-a5ac-162f22690d70.json b/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter2/5723e611-e7e0-47c0-a5ac-162f22690d70.json new file mode 100644 index 000000000..3ebe8bb64 --- /dev/null +++ b/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter2/5723e611-e7e0-47c0-a5ac-162f22690d70.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter2/1762652579.545113", + "retrieved_timestamp": "1762652579.545114", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavieLion/Llama-3.2-1B-SPIN-iter2", + "developer": "meta", + "inference_platform": "unknown", + "id": "DavieLion/Llama-3.2-1B-SPIN-iter2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13761264555822994 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2980340303779312 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.005287009063444108 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35530208333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11286569148936171 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter3/07d16051-fe48-46e6-a47c-806e9f95a92b.json b/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter3/07d16051-fe48-46e6-a47c-806e9f95a92b.json new file mode 100644 index 000000000..0424479ac --- /dev/null +++ b/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter3/07d16051-fe48-46e6-a47c-806e9f95a92b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter3/1762652579.54562", + "retrieved_timestamp": "1762652579.545621", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavieLion/Llama-3.2-1B-SPIN-iter3", + "developer": "meta", + "inference_platform": "unknown", + "id": "DavieLion/Llama-3.2-1B-SPIN-iter3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1323920530858123 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29722352809482616 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3526666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11286569148936171 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter3/7a91746e-e622-4eef-aef8-5f0ba04f03c9.json b/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter3/7a91746e-e622-4eef-aef8-5f0ba04f03c9.json new file mode 100644 index 000000000..16ec88835 --- /dev/null +++ b/data/hfopenllm_v2/meta/DavieLion/Llama-3.2-1B-SPIN-iter3/7a91746e-e622-4eef-aef8-5f0ba04f03c9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter3/1762652579.5453749", + "retrieved_timestamp": "1762652579.545376", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DavieLion/Llama-3.2-1B-SPIN-iter3", + "developer": "meta", + "inference_platform": "unknown", + "id": "DavieLion/Llama-3.2-1B-SPIN-iter3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1335910938531984 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29752276438021447 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34996875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11278257978723404 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.1-8B-Inst/0da22342-b4ef-4dd2-b4f5-327710986701.json b/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.1-8B-Inst/0da22342-b4ef-4dd2-b4f5-327710986701.json new file mode 100644 index 000000000..3db9c335d --- /dev/null +++ b/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.1-8B-Inst/0da22342-b4ef-4dd2-b4f5-327710986701.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_Explore_Llama-3.1-8B-Inst/1762652579.547036", + "retrieved_timestamp": "1762652579.5470378", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepAutoAI/Explore_Llama-3.1-8B-Inst", + "developer": "meta", + "inference_platform": "unknown", + "id": "DeepAutoAI/Explore_Llama-3.1-8B-Inst" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7794828831943688 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.511742159482904 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20090634441087613 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3909583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.379155585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst/f8e00446-f253-4ff3-a9ff-ef182cf9e147.json b/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst/f8e00446-f253-4ff3-a9ff-ef182cf9e147.json new file mode 100644 index 000000000..068803d82 --- /dev/null +++ b/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst/f8e00446-f253-4ff3-a9ff-ef182cf9e147.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_Explore_Llama-3.2-1B-Inst/1762652579.5474088", + "retrieved_timestamp": "1762652579.547411", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepAutoAI/Explore_Llama-3.2-1B-Inst", + "developer": "meta", + "inference_platform": "unknown", + "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5648856146136695 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35048085637770016 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07477341389728097 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31834375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18085106382978725 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0/455764e4-7b66-4189-b2e8-907047a92d45.json b/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0/455764e4-7b66-4189-b2e8-907047a92d45.json new file mode 100644 index 000000000..e3017f72f --- /dev/null +++ b/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0/455764e4-7b66-4189-b2e8-907047a92d45.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v0/1762652579.547727", + "retrieved_timestamp": "1762652579.5477278", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0", + "developer": "meta", + "inference_platform": "unknown", + "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5597148898256625 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33650903200352716 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05966767371601209 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3103125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18035239361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1/40bc60f8-aa35-460b-a7af-b4cccd138c80.json b/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1/40bc60f8-aa35-460b-a7af-b4cccd138c80.json new file mode 100644 index 000000000..0678fd2c6 --- /dev/null +++ b/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1/40bc60f8-aa35-460b-a7af-b4cccd138c80.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v1.1/1762652579.5483131", + "retrieved_timestamp": "1762652579.548314", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5844193406827218 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3512662445055541 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07175226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3117083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18184840425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1/74f0ecd4-e04a-4775-9551-fc0e9fa40314.json b/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1/74f0ecd4-e04a-4775-9551-fc0e9fa40314.json new file mode 100644 index 000000000..b7fb140e2 --- /dev/null +++ b/data/hfopenllm_v2/meta/DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1/74f0ecd4-e04a-4775-9551-fc0e9fa40314.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v1/1762652579.548037", + "retrieved_timestamp": "1762652579.548039", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1", + "developer": "meta", + "inference_platform": "unknown", + "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4998891829235318 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3141475230443668 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030966767371601207 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24496644295302014 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37809374999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12691156914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst/a4da2ab3-adb3-405f-9bb7-2164d740d424.json b/data/hfopenllm_v2/meta/DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst/a4da2ab3-adb3-405f-9bb7-2164d740d424.json new file mode 100644 index 000000000..c72bc026b --- /dev/null +++ b/data/hfopenllm_v2/meta/DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst/a4da2ab3-adb3-405f-9bb7-2164d740d424.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_ldm_soup_Llama-3.1-8B-Inst/1762652579.5498", + "retrieved_timestamp": "1762652579.5498009", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst", + "developer": "meta", + "inference_platform": "unknown", + "id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.803263119633683 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.512116784464076 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18882175226586104 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41613541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38863031914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DeepMount00/Llama-3-8b-Ita/bee65c80-73f2-46e5-9532-8f92b38c4fc5.json b/data/hfopenllm_v2/meta/DeepMount00/Llama-3-8b-Ita/bee65c80-73f2-46e5-9532-8f92b38c4fc5.json new file mode 100644 index 000000000..53079ef63 --- /dev/null +++ b/data/hfopenllm_v2/meta/DeepMount00/Llama-3-8b-Ita/bee65c80-73f2-46e5-9532-8f92b38c4fc5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepMount00_Llama-3-8b-Ita/1762652579.551231", + "retrieved_timestamp": "1762652579.551231", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepMount00/Llama-3-8b-Ita", + "developer": "meta", + "inference_platform": "unknown", + "id": "DeepMount00/Llama-3-8b-Ita" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7530297388706411 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.493576505761469 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4267708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38522273936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-8b-ITA/1c5ce85b-84f3-4ac4-8a98-9d80659bff18.json b/data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-8b-ITA/1c5ce85b-84f3-4ac4-8a98-9d80659bff18.json new file mode 100644 index 000000000..baf7f4266 --- /dev/null +++ b/data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-8b-ITA/1c5ce85b-84f3-4ac4-8a98-9d80659bff18.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepMount00_Llama-3.1-8b-ITA/1762652579.5514839", + "retrieved_timestamp": "1762652579.5514848", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepMount00/Llama-3.1-8b-ITA", + "developer": "meta", + "inference_platform": "unknown", + "id": "DeepMount00/Llama-3.1-8b-ITA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7916727616058724 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5109356715302854 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10876132930513595 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41359375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38763297872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-8b-ITA/ca297bdd-d804-4c43-bb6e-0b7e230974e2.json b/data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-8b-ITA/ca297bdd-d804-4c43-bb6e-0b7e230974e2.json new file mode 100644 index 000000000..e6bcbfd0b --- /dev/null +++ b/data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-8b-ITA/ca297bdd-d804-4c43-bb6e-0b7e230974e2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepMount00_Llama-3.1-8b-Ita/1762652579.551703", + "retrieved_timestamp": "1762652579.5517042", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepMount00/Llama-3.1-8b-Ita", + "developer": "meta", + "inference_platform": "unknown", + "id": "DeepMount00/Llama-3.1-8b-Ita" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5364843060856306 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5169995464792883 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17069486404833836 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44871875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39602726063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Unknown", + "params_billions": 0.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-Distilled/6424a285-b3dc-4221-b3ba-5e7922185269.json b/data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-Distilled/6424a285-b3dc-4221-b3ba-5e7922185269.json new file mode 100644 index 000000000..22e69f9c5 --- /dev/null +++ b/data/hfopenllm_v2/meta/DeepMount00/Llama-3.1-Distilled/6424a285-b3dc-4221-b3ba-5e7922185269.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepMount00_Llama-3.1-Distilled/1762652579.551904", + "retrieved_timestamp": "1762652579.551905", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepMount00/Llama-3.1-Distilled", + "developer": "meta", + "inference_platform": "unknown", + "id": "DeepMount00/Llama-3.1-Distilled" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7843787816327346 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5100875314179011 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20317220543806647 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40581249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3781582446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/DevQuasar/DevQuasar-R1-Uncensored-Llama-8B/490df557-2f50-434a-a28d-a78a234da9fa.json b/data/hfopenllm_v2/meta/DevQuasar/DevQuasar-R1-Uncensored-Llama-8B/490df557-2f50-434a-a28d-a78a234da9fa.json new file mode 100644 index 000000000..5bc30fbc5 --- /dev/null +++ b/data/hfopenllm_v2/meta/DevQuasar/DevQuasar-R1-Uncensored-Llama-8B/490df557-2f50-434a-a28d-a78a234da9fa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DevQuasar_DevQuasar-R1-Uncensored-Llama-8B/1762652579.555449", + "retrieved_timestamp": "1762652579.5554502", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DevQuasar/DevQuasar-R1-Uncensored-Llama-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "DevQuasar/DevQuasar-R1-Uncensored-Llama-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38488432913558246 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5117943836412089 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33081570996978854 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34731543624161076 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44357291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3614527925531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4/bbc78d6d-09e3-410a-9bf9-a6dcdbef346e.json b/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4/bbc78d6d-09e3-410a-9bf9-a6dcdbef346e.json new file mode 100644 index 000000000..b4fcbe51a --- /dev/null +++ b/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4/bbc78d6d-09e3-410a-9bf9-a6dcdbef346e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Enno-Ai_EnnoAi-Pro-French-Llama-3-8B-v0.4/1762652579.5956101", + "retrieved_timestamp": "1762652579.5956109", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4", + "developer": "meta", + "inference_platform": "unknown", + "id": "Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4188807918545016 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4074954889367559 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03625377643504532 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41700000000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634640957446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.031 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3/f1e005a2-b949-4518-b7e5-3fd7af3fcf0f.json b/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3/f1e005a2-b949-4518-b7e5-3fd7af3fcf0f.json new file mode 100644 index 000000000..b4fa4cb7e --- /dev/null +++ b/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3/f1e005a2-b949-4518-b7e5-3fd7af3fcf0f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Enno-Ai_EnnoAi-Pro-Llama-3-8B-v0.3/1762652579.596117", + "retrieved_timestamp": "1762652579.596118", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3", + "developer": "meta", + "inference_platform": "unknown", + "id": "Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5082569803676467 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4100577461090639 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04833836858006042 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42357291666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2990359042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3-8B/39a6c969-d938-4e4c-9adc-f71f1d30143d.json b/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3-8B/39a6c969-d938-4e4c-9adc-f71f1d30143d.json new file mode 100644 index 000000000..cf4b47418 --- /dev/null +++ b/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3-8B/39a6c969-d938-4e4c-9adc-f71f1d30143d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Enno-Ai_EnnoAi-Pro-Llama-3-8B/1762652579.5958989", + "retrieved_timestamp": "1762652579.5958998", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Enno-Ai/EnnoAi-Pro-Llama-3-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "Enno-Ai/EnnoAi-Pro-Llama-3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31953771548380516 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4151575806137866 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4070520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21509308510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.031 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9/cf0ca830-4bb6-4317-97ae-380f54518d9f.json b/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9/cf0ca830-4bb6-4317-97ae-380f54518d9f.json new file mode 100644 index 000000000..42ea4a1cc --- /dev/null +++ b/data/hfopenllm_v2/meta/Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9/cf0ca830-4bb6-4317-97ae-380f54518d9f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Enno-Ai_EnnoAi-Pro-Llama-3.1-8B-v0.9/1762652579.5963311", + "retrieved_timestamp": "1762652579.596332", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9", + "developer": "meta", + "inference_platform": "unknown", + "id": "Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4689147018799009 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41602720836190127 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0377643504531722 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3831770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2595578457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0/32c712e0-4f63-4188-b4c8-5f37b6101e3f.json b/data/hfopenllm_v2/meta/EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0/32c712e0-4f63-4188-b4c8-5f37b6101e3f.json new file mode 100644 index 000000000..de79f2041 --- /dev/null +++ b/data/hfopenllm_v2/meta/EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0/32c712e0-4f63-4188-b4c8-5f37b6101e3f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EnnoAi_EnnoAi-Pro-Llama-3.1-8B-v1.0/1762652579.596818", + "retrieved_timestamp": "1762652579.596819", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0", + "developer": "meta", + "inference_platform": "unknown", + "id": "EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4704384366813389 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41602720836190127 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0377643504531722 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3831770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2595578457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/EpistemeAI/Alpaca-Llama3.1-8B/cd4698d8-e9d0-4a00-855a-6e0b9cfc31d8.json b/data/hfopenllm_v2/meta/EpistemeAI/Alpaca-Llama3.1-8B/cd4698d8-e9d0-4a00-855a-6e0b9cfc31d8.json new file mode 100644 index 000000000..5d8bb879c --- /dev/null +++ b/data/hfopenllm_v2/meta/EpistemeAI/Alpaca-Llama3.1-8B/cd4698d8-e9d0-4a00-855a-6e0b9cfc31d8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Alpaca-Llama3.1-8B/1762652579.5979578", + "retrieved_timestamp": "1762652579.5979588", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Alpaca-Llama3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "EpistemeAI/Alpaca-Llama3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15986914719610634 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47552608539742874 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3402604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3246343085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta/88e9cdd1-ad46-4ad0-9e9b-d872cdb63257.json b/data/hfopenllm_v2/meta/EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta/88e9cdd1-ad46-4ad0-9e9b-d872cdb63257.json new file mode 100644 index 000000000..ebfcd921b --- /dev/null +++ b/data/hfopenllm_v2/meta/EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta/88e9cdd1-ad46-4ad0-9e9b-d872cdb63257.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta/1762652579.600618", + "retrieved_timestamp": "1762652579.600619", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta", + "developer": "meta", + "inference_platform": "unknown", + "id": "EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7274010735958367 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48648902139668476 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15256797583081572 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3619375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3543051861702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2/60d939fa-9ae2-4226-a955-d586c27fea68.json b/data/hfopenllm_v2/meta/EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2/60d939fa-9ae2-4226-a955-d586c27fea68.json new file mode 100644 index 000000000..4b4755328 --- /dev/null +++ b/data/hfopenllm_v2/meta/EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2/60d939fa-9ae2-4226-a955-d586c27fea68.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2/1762652579.600828", + "retrieved_timestamp": "1762652579.600829", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2", + "developer": "meta", + "inference_platform": "unknown", + "id": "EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46731561146646455 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4932027479020209 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12386706948640483 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46236458333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3351894946808511 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT/1bfd3789-e95b-487c-9c8a-516c017f6558.json b/data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT/1bfd3789-e95b-487c-9c8a-516c017f6558.json new file mode 100644 index 000000000..9b3f9bc1c --- /dev/null +++ b/data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT/1bfd3789-e95b-487c-9c8a-516c017f6558.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-R1-Llama-3.1-8B-Medical-COT/1762652579.603883", + "retrieved_timestamp": "1762652579.603883", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT", + "developer": "meta", + "inference_platform": "unknown", + "id": "EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3216111029845255 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37162741490176326 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3270392749244713 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31136458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1402094414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1-Llama-3.1-8B/85ff1b65-eade-4d70-a278-99605f324e5a.json b/data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1-Llama-3.1-8B/85ff1b65-eade-4d70-a278-99605f324e5a.json new file mode 100644 index 000000000..3da88601a --- /dev/null +++ b/data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1-Llama-3.1-8B/85ff1b65-eade-4d70-a278-99605f324e5a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-R1-Llama-3.1-8B/1762652579.603668", + "retrieved_timestamp": "1762652579.603669", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Fireball-R1-Llama-3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "EpistemeAI/Fireball-R1-Llama-3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4427363839058143 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36434977901496834 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311178247734139 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2483221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32879166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11153590425531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1.1-Llama-3.1-8B/5938f7d8-dddb-4989-81c6-e57e177e52c9.json b/data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1.1-Llama-3.1-8B/5938f7d8-dddb-4989-81c6-e57e177e52c9.json new file mode 100644 index 000000000..e88f2d7be --- /dev/null +++ b/data/hfopenllm_v2/meta/EpistemeAI/Fireball-R1.1-Llama-3.1-8B/5938f7d8-dddb-4989-81c6-e57e177e52c9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-R1.1-Llama-3.1-8B/1762652579.604102", + "retrieved_timestamp": "1762652579.604102", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Fireball-R1.1-Llama-3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "EpistemeAI/Fireball-R1.1-Llama-3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3676234613048932 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33260007841271594 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13821752265861026 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3419375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11153590425531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/EpistemeAI/Llama-3.2-3B-Agent007-Coder/ab812077-8d2b-40f8-bc49-65fffd7f6f26.json b/data/hfopenllm_v2/meta/EpistemeAI/Llama-3.2-3B-Agent007-Coder/ab812077-8d2b-40f8-bc49-65fffd7f6f26.json new file mode 100644 index 000000000..b2262271c --- /dev/null +++ b/data/hfopenllm_v2/meta/EpistemeAI/Llama-3.2-3B-Agent007-Coder/ab812077-8d2b-40f8-bc49-65fffd7f6f26.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Llama-3.2-3B-Agent007-Coder/1762652579.6043148", + "retrieved_timestamp": "1762652579.6043148", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Llama-3.2-3B-Agent007-Coder", + "developer": "meta", + "inference_platform": "unknown", + "id": "EpistemeAI/Llama-3.2-3B-Agent007-Coder" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5399562050913798 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4303758760727905 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11102719033232629 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36680208333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28515625 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0/610f3053-b2a9-45a8-ac09-af3edcb8c826.json b/data/hfopenllm_v2/meta/EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0/610f3053-b2a9-45a8-ac09-af3edcb8c826.json new file mode 100644 index 000000000..e813e5e93 --- /dev/null +++ b/data/hfopenllm_v2/meta/EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0/610f3053-b2a9-45a8-ac09-af3edcb8c826.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_OpenReasoner-Llama-3.2-3B-rs1.0/1762652579.604741", + "retrieved_timestamp": "1762652579.6047418", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0", + "developer": "meta", + "inference_platform": "unknown", + "id": "EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7274010735958367 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45185934849403964 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13444108761329304 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3460625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31341422872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO/14560449-0481-4346-aab2-ff75fdab691b.json b/data/hfopenllm_v2/meta/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO/14560449-0481-4346-aab2-ff75fdab691b.json new file mode 100644 index 000000000..e27c739a7 --- /dev/null +++ b/data/hfopenllm_v2/meta/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO/14560449-0481-4346-aab2-ff75fdab691b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO/1762652579.606164", + "retrieved_timestamp": "1762652579.606165", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO", + "developer": "meta", + "inference_platform": "unknown", + "id": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4553263119633683 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4804219047211424 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12915407854984895 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.393125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3597905585106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT/807ed760-775e-4082-90ea-7b524038bebf.json b/data/hfopenllm_v2/meta/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT/807ed760-775e-4082-90ea-7b524038bebf.json new file mode 100644 index 000000000..83a5b6b0a --- /dev/null +++ b/data/hfopenllm_v2/meta/EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT/807ed760-775e-4082-90ea-7b524038bebf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.1-CoT-RE1-NMT/1762652579.6059399", + "retrieved_timestamp": "1762652579.605941", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT", + "developer": "meta", + "inference_platform": "unknown", + "id": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4828532737580731 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47357563863974517 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31821875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33427526595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos/392ea212-afd9-44a3-a6bb-2bba8f124492.json b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos/392ea212-afd9-44a3-a6bb-2bba8f124492.json new file mode 100644 index 000000000..13a6d999b --- /dev/null +++ b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos/392ea212-afd9-44a3-a6bb-2bba8f124492.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1-8B-Philos/1762652579.6100821", + "retrieved_timestamp": "1762652579.610083", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos", + "developer": "meta", + "inference_platform": "unknown", + "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.498640274471735 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4977581192690881 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11858006042296072 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42766666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3405917553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos/536229bc-b1fb-4078-826c-074b09c362b9.json b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos/536229bc-b1fb-4078-826c-074b09c362b9.json new file mode 100644 index 000000000..cb04f2f2c --- /dev/null +++ b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos/536229bc-b1fb-4078-826c-074b09c362b9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.01-8B-Philos/1762652579.610341", + "retrieved_timestamp": "1762652579.610341", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos", + "developer": "meta", + "inference_platform": "unknown", + "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42117913802045237 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49561092312727917 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13595166163141995 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43706249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33834773936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos/b77a4371-97d7-43a0-892f-a0c01c2b8528.json b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos/b77a4371-97d7-43a0-892f-a0c01c2b8528.json new file mode 100644 index 000000000..459538cca --- /dev/null +++ b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos/b77a4371-97d7-43a0-892f-a0c01c2b8528.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.03-8B-Philos/1762652579.6105568", + "retrieved_timestamp": "1762652579.610558", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos", + "developer": "meta", + "inference_platform": "unknown", + "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3880814017916905 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49508699339363266 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42801041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3355219414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos/de05ec0d-805d-4aa5-8ec3-1dc7446e6c1a.json b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos/de05ec0d-805d-4aa5-8ec3-1dc7446e6c1a.json new file mode 100644 index 000000000..c7b55a98b --- /dev/null +++ b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos/de05ec0d-805d-4aa5-8ec3-1dc7446e6c1a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.04-8B-Philos/1762652579.6107578", + "retrieved_timestamp": "1762652579.610759", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos", + "developer": "meta", + "inference_platform": "unknown", + "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40843960690966635 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4930009712421776 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12009063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43721875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3402593085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math/2790feab-6850-4d51-a3a1-78ada0c56d03.json b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math/2790feab-6850-4d51-a3a1-78ada0c56d03.json new file mode 100644 index 000000000..acbedd16d --- /dev/null +++ b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math/2790feab-6850-4d51-a3a1-78ada0c56d03.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.07-8B-Philos-Math/1762652579.611186", + "retrieved_timestamp": "1762652579.611187", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math", + "developer": "meta", + "inference_platform": "unknown", + "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5079079065767719 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4847020640542447 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12009063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40630208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35305851063829785 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection/42a38b08-6eb7-449d-99c5-cb0b2b76dd06.json b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection/42a38b08-6eb7-449d-99c5-cb0b2b76dd06.json new file mode 100644 index 000000000..c27030972 --- /dev/null +++ b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection/42a38b08-6eb7-449d-99c5-cb0b2b76dd06.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection/1762652579.611454", + "retrieved_timestamp": "1762652579.611454", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection", + "developer": "meta", + "inference_platform": "unknown", + "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39522577871159636 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49553052334314723 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12462235649546828 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4048125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35929188829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1/9ce9031b-76fd-4c33-b209-3011643d9266.json b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1/9ce9031b-76fd-4c33-b209-3011643d9266.json new file mode 100644 index 000000000..c04f4bc99 --- /dev/null +++ b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1/9ce9031b-76fd-4c33-b209-3011643d9266.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1/1762652579.611669", + "retrieved_timestamp": "1762652579.61167", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1", + "developer": "meta", + "inference_platform": "unknown", + "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5316382753316755 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4827931104634334 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12386706948640483 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4103020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3523105053191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection/5ea20ab3-9d05-43f1-a276-7acbd2229fe8.json b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection/5ea20ab3-9d05-43f1-a276-7acbd2229fe8.json new file mode 100644 index 000000000..47f8ba0dc --- /dev/null +++ b/data/hfopenllm_v2/meta/EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection/5ea20ab3-9d05-43f1-a276-7acbd2229fe8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Llama-3.1-8B-Philos-Reflection/1762652579.6118872", + "retrieved_timestamp": "1762652579.6118872", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection", + "developer": "meta", + "inference_platform": "unknown", + "id": "EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3596047376516532 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4897693552241443 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3957291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3550531914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b-Ties/febdde9e-8e67-458b-be79-6a9c91a7237a.json b/data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b-Ties/febdde9e-8e67-458b-be79-6a9c91a7237a.json new file mode 100644 index 000000000..7b6c24f3e --- /dev/null +++ b/data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b-Ties/febdde9e-8e67-458b-be79-6a9c91a7237a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Etherll_Herplete-LLM-Llama-3.1-8b-Ties/1762652579.614388", + "retrieved_timestamp": "1762652579.614389", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Etherll/Herplete-LLM-Llama-3.1-8b-Ties", + "developer": "meta", + "inference_platform": "unknown", + "id": "Etherll/Herplete-LLM-Llama-3.1-8b-Ties" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6163679038285084 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5337975953250876 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16012084592145015 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40171874999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.375249335106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b/3d70d2d7-1510-45de-93dc-1ba93cb0f24a.json b/data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b/3d70d2d7-1510-45de-93dc-1ba93cb0f24a.json new file mode 100644 index 000000000..ae3bc41e0 --- /dev/null +++ b/data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b/3d70d2d7-1510-45de-93dc-1ba93cb0f24a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Etherll_Herplete-LLM-Llama-3.1-8b/1762652579.614203", + "retrieved_timestamp": "1762652579.614203", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Etherll/Herplete-LLM-Llama-3.1-8b", + "developer": "meta", + "inference_platform": "unknown", + "id": "Etherll/Herplete-LLM-Llama-3.1-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6105976586568084 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5347253355929804 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15483383685800603 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3990520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.375249335106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b/52e6e50e-4621-491f-9e46-8d6d398c4344.json b/data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b/52e6e50e-4621-491f-9e46-8d6d398c4344.json new file mode 100644 index 000000000..2482b4a7c --- /dev/null +++ b/data/hfopenllm_v2/meta/Etherll/Herplete-LLM-Llama-3.1-8b/52e6e50e-4621-491f-9e46-8d6d398c4344.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Etherll_Herplete-LLM-Llama-3.1-8b/1762652579.613958", + "retrieved_timestamp": "1762652579.6139588", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Etherll/Herplete-LLM-Llama-3.1-8b", + "developer": "meta", + "inference_platform": "unknown", + "id": "Etherll/Herplete-LLM-Llama-3.1-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46719149634082013 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5013428726325629 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38599999999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34815492021276595 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Etherll/Replete-LLM-V3-Llama-3.1-8b/66846c9d-e2bc-416d-95b4-fed31d1b781b.json b/data/hfopenllm_v2/meta/Etherll/Replete-LLM-V3-Llama-3.1-8b/66846c9d-e2bc-416d-95b4-fed31d1b781b.json new file mode 100644 index 000000000..b43027208 --- /dev/null +++ b/data/hfopenllm_v2/meta/Etherll/Replete-LLM-V3-Llama-3.1-8b/66846c9d-e2bc-416d-95b4-fed31d1b781b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Etherll_Replete-LLM-V3-Llama-3.1-8b/1762652579.6150668", + "retrieved_timestamp": "1762652579.615068", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Etherll/Replete-LLM-V3-Llama-3.1-8b", + "developer": "meta", + "inference_platform": "unknown", + "id": "Etherll/Replete-LLM-V3-Llama-3.1-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5262924595628488 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4543377420594779 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22734138972809667 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3516458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34699135638297873 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Eurdem/Defne-llama3.1-8B/52eb695b-3d17-4abe-a386-7927348e5dd5.json b/data/hfopenllm_v2/meta/Eurdem/Defne-llama3.1-8B/52eb695b-3d17-4abe-a386-7927348e5dd5.json new file mode 100644 index 000000000..fe0407f5a --- /dev/null +++ b/data/hfopenllm_v2/meta/Eurdem/Defne-llama3.1-8B/52eb695b-3d17-4abe-a386-7927348e5dd5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Eurdem_Defne-llama3.1-8B/1762652579.615498", + "retrieved_timestamp": "1762652579.615499", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Eurdem/Defne-llama3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "Eurdem/Defne-llama3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5036115285220991 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5320979090308238 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16012084592145015 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43309375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3865525265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/GenVRadmin/llama38bGenZ_Vikas-Merged/22a01298-038f-4069-b847-43409d2d4baa.json b/data/hfopenllm_v2/meta/GenVRadmin/llama38bGenZ_Vikas-Merged/22a01298-038f-4069-b847-43409d2d4baa.json new file mode 100644 index 000000000..c77ab2d8b --- /dev/null +++ b/data/hfopenllm_v2/meta/GenVRadmin/llama38bGenZ_Vikas-Merged/22a01298-038f-4069-b847-43409d2d4baa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/GenVRadmin_llama38bGenZ_Vikas-Merged/1762652579.627924", + "retrieved_timestamp": "1762652579.627925", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "GenVRadmin/llama38bGenZ_Vikas-Merged", + "developer": "meta", + "inference_platform": "unknown", + "id": "GenVRadmin/llama38bGenZ_Vikas-Merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30002947734234053 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4535981003984562 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44016666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26221742021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Groq/Llama-3-Groq-8B-Tool-Use/636b3b4a-dc1f-4008-83ba-0d83fdcd5acb.json b/data/hfopenllm_v2/meta/Groq/Llama-3-Groq-8B-Tool-Use/636b3b4a-dc1f-4008-83ba-0d83fdcd5acb.json new file mode 100644 index 000000000..13282d500 --- /dev/null +++ b/data/hfopenllm_v2/meta/Groq/Llama-3-Groq-8B-Tool-Use/636b3b4a-dc1f-4008-83ba-0d83fdcd5acb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Groq_Llama-3-Groq-8B-Tool-Use/1762652579.633301", + "retrieved_timestamp": "1762652579.633302", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Groq/Llama-3-Groq-8B-Tool-Use", + "developer": "meta", + "inference_platform": "unknown", + "id": "Groq/Llama-3-Groq-8B-Tool-Use" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6098230472922956 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4863384977901497 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36603125000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33992686170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Gryphe/Pantheon-RP-1.0-8b-Llama-3/a3abb802-acd8-49c7-bcff-3b79a4023d96.json b/data/hfopenllm_v2/meta/Gryphe/Pantheon-RP-1.0-8b-Llama-3/a3abb802-acd8-49c7-bcff-3b79a4023d96.json new file mode 100644 index 000000000..8c8ebceaa --- /dev/null +++ b/data/hfopenllm_v2/meta/Gryphe/Pantheon-RP-1.0-8b-Llama-3/a3abb802-acd8-49c7-bcff-3b79a4023d96.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-1.0-8b-Llama-3/1762652579.633556", + "retrieved_timestamp": "1762652579.633556", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Gryphe/Pantheon-RP-1.0-8b-Llama-3", + "developer": "meta", + "inference_platform": "unknown", + "id": "Gryphe/Pantheon-RP-1.0-8b-Llama-3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39325212657969744 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4539075127777334 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3832395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30668218085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/HPAI-BSC/Llama3-Aloe-8B-Alpha/10d1f626-64f0-4f43-9597-1221cf94c948.json b/data/hfopenllm_v2/meta/HPAI-BSC/Llama3-Aloe-8B-Alpha/10d1f626-64f0-4f43-9597-1221cf94c948.json new file mode 100644 index 000000000..b483da9b8 --- /dev/null +++ b/data/hfopenllm_v2/meta/HPAI-BSC/Llama3-Aloe-8B-Alpha/10d1f626-64f0-4f43-9597-1221cf94c948.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HPAI-BSC_Llama3-Aloe-8B-Alpha/1762652579.6361432", + "retrieved_timestamp": "1762652579.6361442", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HPAI-BSC/Llama3-Aloe-8B-Alpha", + "developer": "meta", + "inference_platform": "unknown", + "id": "HPAI-BSC/Llama3-Aloe-8B-Alpha" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5081073773144147 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48308532966126966 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3672708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3295378989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/HPAI-BSC/Llama3.1-Aloe-Beta-8B/d7410909-8a7c-4afb-9cab-2537f837a9a1.json b/data/hfopenllm_v2/meta/HPAI-BSC/Llama3.1-Aloe-Beta-8B/d7410909-8a7c-4afb-9cab-2537f837a9a1.json new file mode 100644 index 000000000..66a257c9a --- /dev/null +++ b/data/hfopenllm_v2/meta/HPAI-BSC/Llama3.1-Aloe-Beta-8B/d7410909-8a7c-4afb-9cab-2537f837a9a1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HPAI-BSC_Llama3.1-Aloe-Beta-8B/1762652579.636478", + "retrieved_timestamp": "1762652579.636513", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HPAI-BSC/Llama3.1-Aloe-Beta-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "HPAI-BSC/Llama3.1-Aloe-Beta-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7253276860951166 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5092760762748857 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18277945619335348 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3834583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35804521276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Hastagaras/Llama-3.1-Jamet-8B-MK.I/be7d90fa-86be-4f3b-a3ef-2e1475b7bd64.json b/data/hfopenllm_v2/meta/Hastagaras/Llama-3.1-Jamet-8B-MK.I/be7d90fa-86be-4f3b-a3ef-2e1475b7bd64.json new file mode 100644 index 000000000..07983eeee --- /dev/null +++ b/data/hfopenllm_v2/meta/Hastagaras/Llama-3.1-Jamet-8B-MK.I/be7d90fa-86be-4f3b-a3ef-2e1475b7bd64.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Hastagaras_Llama-3.1-Jamet-8B-MK.I/1762652579.637886", + "retrieved_timestamp": "1762652579.637887", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hastagaras/Llama-3.1-Jamet-8B-MK.I", + "developer": "meta", + "inference_platform": "unknown", + "id": "Hastagaras/Llama-3.1-Jamet-8B-MK.I" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7338207068356406 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5048666433733161 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1268882175226586 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3726041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3482380319148936 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Hastagaras/Zabuza-8B-Llama-3.1/fb698ce2-d422-46eb-aa98-17fb7645461a.json b/data/hfopenllm_v2/meta/Hastagaras/Zabuza-8B-Llama-3.1/fb698ce2-d422-46eb-aa98-17fb7645461a.json new file mode 100644 index 000000000..c006aae5d --- /dev/null +++ b/data/hfopenllm_v2/meta/Hastagaras/Zabuza-8B-Llama-3.1/fb698ce2-d422-46eb-aa98-17fb7645461a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Hastagaras_Zabuza-8B-Llama-3.1/1762652579.638141", + "retrieved_timestamp": "1762652579.6381419", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Hastagaras/Zabuza-8B-Llama-3.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Hastagaras/Zabuza-8B-Llama-3.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6265342624237025 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4538915742546196 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3567916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29230385638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/HiroseKoichi/Llama-Salad-4x8B-V3/69037dce-5276-4e26-aa05-0a7bd2c4739b.json b/data/hfopenllm_v2/meta/HiroseKoichi/Llama-Salad-4x8B-V3/69037dce-5276-4e26-aa05-0a7bd2c4739b.json new file mode 100644 index 000000000..358d6e0a5 --- /dev/null +++ b/data/hfopenllm_v2/meta/HiroseKoichi/Llama-Salad-4x8B-V3/69037dce-5276-4e26-aa05-0a7bd2c4739b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HiroseKoichi_Llama-Salad-4x8B-V3/1762652579.640251", + "retrieved_timestamp": "1762652579.6402519", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HiroseKoichi/Llama-Salad-4x8B-V3", + "developer": "meta", + "inference_platform": "unknown", + "id": "HiroseKoichi/Llama-Salad-4x8B-V3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6653523761397536 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5244649789001753 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09592145015105741 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37403125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.351811835106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.942 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/HoangHa/Pensez-Llama3.1-8B/d27e73c5-654c-48c6-ad60-652a60bda72c.json b/data/hfopenllm_v2/meta/HoangHa/Pensez-Llama3.1-8B/d27e73c5-654c-48c6-ad60-652a60bda72c.json new file mode 100644 index 000000000..7e8fadcfa --- /dev/null +++ b/data/hfopenllm_v2/meta/HoangHa/Pensez-Llama3.1-8B/d27e73c5-654c-48c6-ad60-652a60bda72c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HoangHa_Pensez-Llama3.1-8B/1762652579.640512", + "retrieved_timestamp": "1762652579.640512", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HoangHa/Pensez-Llama3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "HoangHa/Pensez-Llama3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3886809221753835 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46691313514505667 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1148036253776435 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3596979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31258311170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/IDEA-CCNL/Ziya-LLaMA-13B-v1/98616cce-563a-4977-b5c0-bf8df3102303.json b/data/hfopenllm_v2/meta/IDEA-CCNL/Ziya-LLaMA-13B-v1/98616cce-563a-4977-b5c0-bf8df3102303.json new file mode 100644 index 000000000..28a38220d --- /dev/null +++ b/data/hfopenllm_v2/meta/IDEA-CCNL/Ziya-LLaMA-13B-v1/98616cce-563a-4977-b5c0-bf8df3102303.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/IDEA-CCNL_Ziya-LLaMA-13B-v1/1762652579.645581", + "retrieved_timestamp": "1762652579.645581", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IDEA-CCNL/Ziya-LLaMA-13B-v1", + "developer": "meta", + "inference_platform": "unknown", + "id": "IDEA-CCNL/Ziya-LLaMA-13B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16968643200042555 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28770292445409473 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24916107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37505208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11012300531914894 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0/8c8a47f2-c8cf-4ea8-b0ee-0180aeb1b9f0.json b/data/hfopenllm_v2/meta/Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0/8c8a47f2-c8cf-4ea8-b0ee-0180aeb1b9f0.json new file mode 100644 index 000000000..b72842337 --- /dev/null +++ b/data/hfopenllm_v2/meta/Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0/8c8a47f2-c8cf-4ea8-b0ee-0180aeb1b9f0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Infinirc_Infinirc-Llama3-8B-2G-Release-v1.0/1762652579.6465652", + "retrieved_timestamp": "1762652579.6465652", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0", + "developer": "meta", + "inference_platform": "unknown", + "id": "Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20243398626754788 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43507435668237937 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4609375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21600731382978725 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/IntervitensInc/internlm2_5-20b-llamafied/5be7b084-b018-457a-a5d6-c9e3e9d3f70e.json b/data/hfopenllm_v2/meta/IntervitensInc/internlm2_5-20b-llamafied/5be7b084-b018-457a-a5d6-c9e3e9d3f70e.json new file mode 100644 index 000000000..66c9c838c --- /dev/null +++ b/data/hfopenllm_v2/meta/IntervitensInc/internlm2_5-20b-llamafied/5be7b084-b018-457a-a5d6-c9e3e9d3f70e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/IntervitensInc_internlm2_5-20b-llamafied/1762652579.6480021", + "retrieved_timestamp": "1762652579.648003", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "IntervitensInc/internlm2_5-20b-llamafied", + "developer": "meta", + "inference_platform": "unknown", + "id": "IntervitensInc/internlm2_5-20b-llamafied" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3409952260003457 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7478466526577329 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1714501510574018 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44754166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4050864361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 19.861 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/JackFram/llama-160m/11a0fc6d-5370-456e-8c01-5d7ed19e4b59.json b/data/hfopenllm_v2/meta/JackFram/llama-160m/11a0fc6d-5370-456e-8c01-5d7ed19e4b59.json new file mode 100644 index 000000000..86e36ecea --- /dev/null +++ b/data/hfopenllm_v2/meta/JackFram/llama-160m/11a0fc6d-5370-456e-8c01-5d7ed19e4b59.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JackFram_llama-160m/1762652579.649858", + "retrieved_timestamp": "1762652579.649858", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JackFram/llama-160m", + "developer": "meta", + "inference_platform": "unknown", + "id": "JackFram/llama-160m" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1791036671586945 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28880217539042424 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.008308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3792083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11278257978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.162 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/JackFram/llama-68m/3b05e3fd-4bf0-42a3-8dc5-13292ece8c77.json b/data/hfopenllm_v2/meta/JackFram/llama-68m/3b05e3fd-4bf0-42a3-8dc5-13292ece8c77.json new file mode 100644 index 000000000..f4816cbdc --- /dev/null +++ b/data/hfopenllm_v2/meta/JackFram/llama-68m/3b05e3fd-4bf0-42a3-8dc5-13292ece8c77.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/JackFram_llama-68m/1762652579.650121", + "retrieved_timestamp": "1762652579.650121", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "JackFram/llama-68m", + "developer": "meta", + "inference_platform": "unknown", + "id": "JackFram/llama-68m" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17263416623448008 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29362986509336414 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3909895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11436170212765957 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.068 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32/26dd2a1f-27ae-4311-9b80-f5a8f0fa456a.json b/data/hfopenllm_v2/meta/Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32/26dd2a1f-27ae-4311-9b80-f5a8f0fa456a.json new file mode 100644 index 000000000..c007855ad --- /dev/null +++ b/data/hfopenllm_v2/meta/Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32/26dd2a1f-27ae-4311-9b80-f5a8f0fa456a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Joseph717171_Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32/1762652579.694483", + "retrieved_timestamp": "1762652579.694484", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32", + "developer": "meta", + "inference_platform": "unknown", + "id": "Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6185410266980501 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5177452540141246 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4369375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31441156914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base/e5843711-00cb-4167-a47d-4874af0c3ba2.json b/data/hfopenllm_v2/meta/Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base/e5843711-00cb-4167-a47d-4874af0c3ba2.json new file mode 100644 index 000000000..c70d1b52f --- /dev/null +++ b/data/hfopenllm_v2/meta/Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base/e5843711-00cb-4167-a47d-4874af0c3ba2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Joseph717171_Llama-3.1-SuperNova-8B-Lite_TIES_with_Base/1762652579.6947358", + "retrieved_timestamp": "1762652579.694737", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base", + "developer": "meta", + "inference_platform": "unknown", + "id": "Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8096328851890761 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5147423127141911 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18353474320241692 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4109895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38804853723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama/670580f3-ca8a-473d-a3df-8c01952bda00.json b/data/hfopenllm_v2/meta/Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama/670580f3-ca8a-473d-a3df-8c01952bda00.json new file mode 100644 index 000000000..699771a1d --- /dev/null +++ b/data/hfopenllm_v2/meta/Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama/670580f3-ca8a-473d-a3df-8c01952bda00.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Josephgflowers_Differential-Attention-Liquid-Metal-Tinyllama/1762652579.695199", + "retrieved_timestamp": "1762652579.6952", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama", + "developer": "meta", + "inference_platform": "unknown", + "id": "Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22269245601670234 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.292556113105267 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0324773413897281 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25083892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33555208333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12142619680851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Josephgflowers/TinyLlama-Cinder-Agent-v1/00332c0d-d698-4ecd-9c2d-5f56921709d5.json b/data/hfopenllm_v2/meta/Josephgflowers/TinyLlama-Cinder-Agent-v1/00332c0d-d698-4ecd-9c2d-5f56921709d5.json new file mode 100644 index 000000000..9077feb28 --- /dev/null +++ b/data/hfopenllm_v2/meta/Josephgflowers/TinyLlama-Cinder-Agent-v1/00332c0d-d698-4ecd-9c2d-5f56921709d5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Josephgflowers_TinyLlama-Cinder-Agent-v1/1762652579.695456", + "retrieved_timestamp": "1762652579.695457", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Josephgflowers/TinyLlama-Cinder-Agent-v1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Josephgflowers/TinyLlama-Cinder-Agent-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26695612087040166 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31160367351776513 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24412751677852348 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33945833333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11610704787234043 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Josephgflowers/TinyLlama-v1.1-Cinders-World/2b993039-8980-4578-a9e2-a22a39385664.json b/data/hfopenllm_v2/meta/Josephgflowers/TinyLlama-v1.1-Cinders-World/2b993039-8980-4578-a9e2-a22a39385664.json new file mode 100644 index 000000000..4f9c664a5 --- /dev/null +++ b/data/hfopenllm_v2/meta/Josephgflowers/TinyLlama-v1.1-Cinders-World/2b993039-8980-4578-a9e2-a22a39385664.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Josephgflowers_TinyLlama-v1.1-Cinders-World/1762652579.6958752", + "retrieved_timestamp": "1762652579.6958761", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Josephgflowers/TinyLlama-v1.1-Cinders-World", + "developer": "meta", + "inference_platform": "unknown", + "id": "Josephgflowers/TinyLlama-v1.1-Cinders-World" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24692260978647768 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29979653176003074 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24412751677852348 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3356145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11984707446808511 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Josephgflowers/TinyLlama_v1.1_math_code-world-test-1/72cf7999-e4cb-4987-a694-cdcfae37bb02.json b/data/hfopenllm_v2/meta/Josephgflowers/TinyLlama_v1.1_math_code-world-test-1/72cf7999-e4cb-4987-a694-cdcfae37bb02.json new file mode 100644 index 000000000..251cbf694 --- /dev/null +++ b/data/hfopenllm_v2/meta/Josephgflowers/TinyLlama_v1.1_math_code-world-test-1/72cf7999-e4cb-4987-a694-cdcfae37bb02.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Josephgflowers_TinyLlama_v1.1_math_code-world-test-1/1762652579.696125", + "retrieved_timestamp": "1762652579.696125", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Josephgflowers/TinyLlama_v1.1_math_code-world-test-1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Josephgflowers/TinyLlama_v1.1_math_code-world-test-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00784363267242029 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31463497508928434 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23406040268456377 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34990625000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11319813829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1/0c22748e-74ad-4bac-a714-c64a19a88af7.json b/data/hfopenllm_v2/meta/Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1/0c22748e-74ad-4bac-a714-c64a19a88af7.json new file mode 100644 index 000000000..765068c86 --- /dev/null +++ b/data/hfopenllm_v2/meta/Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1/0c22748e-74ad-4bac-a714-c64a19a88af7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Josephgflowers_Tinyllama-STEM-Cinder-Agent-v1/1762652579.696357", + "retrieved_timestamp": "1762652579.696357", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21257596510591897 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30843808427144626 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06722054380664652 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2348993288590604 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.334125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10862699468085106 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Josephgflowers/Tinyllama-r1/4293bc9f-4968-4af9-acd2-0ada64be43d4.json b/data/hfopenllm_v2/meta/Josephgflowers/Tinyllama-r1/4293bc9f-4968-4af9-acd2-0ada64be43d4.json new file mode 100644 index 000000000..d9debcce2 --- /dev/null +++ b/data/hfopenllm_v2/meta/Josephgflowers/Tinyllama-r1/4293bc9f-4968-4af9-acd2-0ada64be43d4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Josephgflowers_Tinyllama-r1/1762652579.6965919", + "retrieved_timestamp": "1762652579.6965928", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Josephgflowers/Tinyllama-r1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Josephgflowers/Tinyllama-r1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2119265770378152 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3014631984266974 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0324773413897281 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33148958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11344747340425532 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/KingNish/Reasoning-Llama-3b-v0.1/5f6f312f-3131-417d-b12e-3e30bb998d27.json b/data/hfopenllm_v2/meta/KingNish/Reasoning-Llama-3b-v0.1/5f6f312f-3131-417d-b12e-3e30bb998d27.json new file mode 100644 index 000000000..5709704cf --- /dev/null +++ b/data/hfopenllm_v2/meta/KingNish/Reasoning-Llama-3b-v0.1/5f6f312f-3131-417d-b12e-3e30bb998d27.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/KingNish_Reasoning-Llama-3b-v0.1/1762652579.69997", + "retrieved_timestamp": "1762652579.699971", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "KingNish/Reasoning-Llama-3b-v0.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "KingNish/Reasoning-Llama-3b-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6224628430342602 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43433592509582786 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31676041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3029421542553192 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Kukedlc/NeuralLLaMa-3-8b-DT-v0.1/ec1bea6a-91e2-41c9-ab54-af84bf1a1d15.json b/data/hfopenllm_v2/meta/Kukedlc/NeuralLLaMa-3-8b-DT-v0.1/ec1bea6a-91e2-41c9-ab54-af84bf1a1d15.json new file mode 100644 index 000000000..91395f060 --- /dev/null +++ b/data/hfopenllm_v2/meta/Kukedlc/NeuralLLaMa-3-8b-DT-v0.1/ec1bea6a-91e2-41c9-ab54-af84bf1a1d15.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralLLaMa-3-8b-DT-v0.1/1762652579.7021902", + "retrieved_timestamp": "1762652579.702191", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kukedlc/NeuralLLaMa-3-8b-DT-v0.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Kukedlc/NeuralLLaMa-3-8b-DT-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4371412297149342 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4986771544360115 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08081570996978851 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40711458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.379155585106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3/02d060d9-d545-445b-8d22-4ae117b8f324.json b/data/hfopenllm_v2/meta/Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3/02d060d9-d545-445b-8d22-4ae117b8f324.json new file mode 100644 index 000000000..c3ddbe96c --- /dev/null +++ b/data/hfopenllm_v2/meta/Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3/02d060d9-d545-445b-8d22-4ae117b8f324.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralLLaMa-3-8b-ORPO-v0.3/1762652579.7024388", + "retrieved_timestamp": "1762652579.70244", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3", + "developer": "meta", + "inference_platform": "unknown", + "id": "Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5275912356990563 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4557141539616392 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04833836858006042 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23909395973154363 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37003125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3056848404255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/LEESM/llama-2-7b-hf-lora-oki100p/13881952-9fe3-4308-93d5-912e59465d6e.json b/data/hfopenllm_v2/meta/LEESM/llama-2-7b-hf-lora-oki100p/13881952-9fe3-4308-93d5-912e59465d6e.json new file mode 100644 index 000000000..b57bcb162 --- /dev/null +++ b/data/hfopenllm_v2/meta/LEESM/llama-2-7b-hf-lora-oki100p/13881952-9fe3-4308-93d5-912e59465d6e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LEESM_llama-2-7b-hf-lora-oki100p/1762652579.704138", + "retrieved_timestamp": "1762652579.704139", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LEESM/llama-2-7b-hf-lora-oki100p", + "developer": "meta", + "inference_platform": "unknown", + "id": "LEESM/llama-2-7b-hf-lora-oki100p" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25129434345314877 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34916752720369776 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3687291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18558843085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/LEESM/llama-2-7b-hf-lora-oki10p/9fb11511-0c66-495a-b634-da6bb0934706.json b/data/hfopenllm_v2/meta/LEESM/llama-2-7b-hf-lora-oki10p/9fb11511-0c66-495a-b634-da6bb0934706.json new file mode 100644 index 000000000..66aa649c0 --- /dev/null +++ b/data/hfopenllm_v2/meta/LEESM/llama-2-7b-hf-lora-oki10p/9fb11511-0c66-495a-b634-da6bb0934706.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LEESM_llama-2-7b-hf-lora-oki10p/1762652579.704393", + "retrieved_timestamp": "1762652579.704394", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LEESM/llama-2-7b-hf-lora-oki10p", + "developer": "meta", + "inference_platform": "unknown", + "id": "LEESM/llama-2-7b-hf-lora-oki10p" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22701432199896276 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3530929513059229 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34752083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16788563829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/LEESM/llama-3-8b-bnb-4b-kowiki231101/5f540be5-6932-41f4-b588-b88f8cfb89c7.json b/data/hfopenllm_v2/meta/LEESM/llama-3-8b-bnb-4b-kowiki231101/5f540be5-6932-41f4-b588-b88f8cfb89c7.json new file mode 100644 index 000000000..8096d7f1c --- /dev/null +++ b/data/hfopenllm_v2/meta/LEESM/llama-3-8b-bnb-4b-kowiki231101/5f540be5-6932-41f4-b588-b88f8cfb89c7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LEESM_llama-3-8b-bnb-4b-kowiki231101/1762652579.704602", + "retrieved_timestamp": "1762652579.704603", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LEESM/llama-3-8b-bnb-4b-kowiki231101", + "developer": "meta", + "inference_platform": "unknown", + "id": "LEESM/llama-3-8b-bnb-4b-kowiki231101" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16848739123303944 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4130805653617178 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3551458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24251994680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p/629b8df0-6ce3-4230-baf7-45b3944bf0d5.json b/data/hfopenllm_v2/meta/LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p/629b8df0-6ce3-4230-baf7-45b3944bf0d5.json new file mode 100644 index 000000000..d5123df00 --- /dev/null +++ b/data/hfopenllm_v2/meta/LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p/629b8df0-6ce3-4230-baf7-45b3944bf0d5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LEESM_llama-3-Korean-Bllossom-8B-trexlab-oki10p/1762652579.7048151", + "retrieved_timestamp": "1762652579.704816", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p", + "developer": "meta", + "inference_platform": "unknown", + "id": "LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21372513818889433 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43430121169320707 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04682779456193353 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38692708333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3176529255319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged/0338e807-8f8e-41d9-b4ac-d80239340678.json b/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged/0338e807-8f8e-41d9-b4ac-d80239340678.json new file mode 100644 index 000000000..4c5448de0 --- /dev/null +++ b/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged/0338e807-8f8e-41d9-b4ac-d80239340678.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3-8B-unsloth_v2-merged/1762652579.733024", + "retrieved_timestamp": "1762652579.733025", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged", + "developer": "meta", + "inference_platform": "unknown", + "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6946280314011268 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48600920882996324 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3316145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3505651595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged/c96743a9-b5ca-40ab-a86a-ed1c7ab8ddfd.json b/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged/c96743a9-b5ca-40ab-a86a-ed1c7ab8ddfd.json new file mode 100644 index 000000000..e92d76201 --- /dev/null +++ b/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged/c96743a9-b5ca-40ab-a86a-ed1c7ab8ddfd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3-8B-unsloth_v3-merged/1762652579.733407", + "retrieved_timestamp": "1762652579.7334101", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged", + "developer": "meta", + "inference_platform": "unknown", + "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6762933460994606 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4908161460506797 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3356145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34956781914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged/0f52efcb-1b9b-4df1-820b-a8c0698481a7.json b/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged/0f52efcb-1b9b-4df1-820b-a8c0698481a7.json new file mode 100644 index 000000000..8248f8565 --- /dev/null +++ b/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged/0f52efcb-1b9b-4df1-820b-a8c0698481a7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3-8B-unsloth_v4-one-merged/1762652579.7341938", + "retrieved_timestamp": "1762652579.734195", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged", + "developer": "meta", + "inference_platform": "unknown", + "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32108693821283085 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47387586084568856 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40692708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33527260638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3.1-8B-unsloth-merged/82d77852-64e4-4dd0-a636-785958786fd2.json b/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3.1-8B-unsloth-merged/82d77852-64e4-4dd0-a636-785958786fd2.json new file mode 100644 index 000000000..30f65fdb5 --- /dev/null +++ b/data/hfopenllm_v2/meta/LimYeri/CodeMind-Llama3.1-8B-unsloth-merged/82d77852-64e4-4dd0-a636-785958786fd2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3.1-8B-unsloth-merged/1762652579.7344582", + "retrieved_timestamp": "1762652579.734459", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "LimYeri/CodeMind-Llama3.1-8B-unsloth-merged", + "developer": "meta", + "inference_platform": "unknown", + "id": "LimYeri/CodeMind-Llama3.1-8B-unsloth-merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6490157227268093 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4694777854416285 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10876132930513595 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37523958333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33402593085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Locutusque/Hercules-6.0-Llama-3.1-8B/2084dde6-b1e3-457b-9854-ace18cc5d943.json b/data/hfopenllm_v2/meta/Locutusque/Hercules-6.0-Llama-3.1-8B/2084dde6-b1e3-457b-9854-ace18cc5d943.json new file mode 100644 index 000000000..279bac2e4 --- /dev/null +++ b/data/hfopenllm_v2/meta/Locutusque/Hercules-6.0-Llama-3.1-8B/2084dde6-b1e3-457b-9854-ace18cc5d943.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Locutusque_Hercules-6.0-Llama-3.1-8B/1762652579.734967", + "retrieved_timestamp": "1762652579.734968", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Locutusque/Hercules-6.0-Llama-3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "Locutusque/Hercules-6.0-Llama-3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6630041622893922 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48133037900119535 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16691842900302115 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.362125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3614527925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Locutusque/Hercules-6.1-Llama-3.1-8B/267ac6ef-168e-489b-a7cc-0ff448b0acbf.json b/data/hfopenllm_v2/meta/Locutusque/Hercules-6.1-Llama-3.1-8B/267ac6ef-168e-489b-a7cc-0ff448b0acbf.json new file mode 100644 index 000000000..4643b9352 --- /dev/null +++ b/data/hfopenllm_v2/meta/Locutusque/Hercules-6.1-Llama-3.1-8B/267ac6ef-168e-489b-a7cc-0ff448b0acbf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Locutusque_Hercules-6.1-Llama-3.1-8B/1762652579.735234", + "retrieved_timestamp": "1762652579.735234", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Locutusque/Hercules-6.1-Llama-3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "Locutusque/Hercules-6.1-Llama-3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6006806384836678 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46562423765034017 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17598187311178248 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35533333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36685505319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Locutusque/Llama-3-NeuralHercules-5.0-8B/0c540f58-808b-42fc-b4b9-346367742f70.json b/data/hfopenllm_v2/meta/Locutusque/Llama-3-NeuralHercules-5.0-8B/0c540f58-808b-42fc-b4b9-346367742f70.json new file mode 100644 index 000000000..168550448 --- /dev/null +++ b/data/hfopenllm_v2/meta/Locutusque/Llama-3-NeuralHercules-5.0-8B/0c540f58-808b-42fc-b4b9-346367742f70.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Locutusque_Llama-3-NeuralHercules-5.0-8B/1762652579.735453", + "retrieved_timestamp": "1762652579.735453", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Locutusque/Llama-3-NeuralHercules-5.0-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "Locutusque/Llama-3-NeuralHercules-5.0-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4489310584803876 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3940474241916672 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3880729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29330119680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Locutusque/Llama-3-Yggdrasil-2.0-8B/478f0d4e-41e5-41c7-b9da-07db69c1d561.json b/data/hfopenllm_v2/meta/Locutusque/Llama-3-Yggdrasil-2.0-8B/478f0d4e-41e5-41c7-b9da-07db69c1d561.json new file mode 100644 index 000000000..6b67e734b --- /dev/null +++ b/data/hfopenllm_v2/meta/Locutusque/Llama-3-Yggdrasil-2.0-8B/478f0d4e-41e5-41c7-b9da-07db69c1d561.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Locutusque_Llama-3-Yggdrasil-2.0-8B/1762652579.7359009", + "retrieved_timestamp": "1762652579.735904", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Locutusque/Llama-3-Yggdrasil-2.0-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "Locutusque/Llama-3-Yggdrasil-2.0-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5370583385417359 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47724551424666856 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39765625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.316655585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Lyte/Llama-3.2-3B-Overthinker/d997330d-6679-4d63-839c-677694ea4abc.json b/data/hfopenllm_v2/meta/Lyte/Llama-3.2-3B-Overthinker/d997330d-6679-4d63-839c-677694ea4abc.json new file mode 100644 index 000000000..b0a47abaa --- /dev/null +++ b/data/hfopenllm_v2/meta/Lyte/Llama-3.2-3B-Overthinker/d997330d-6679-4d63-839c-677694ea4abc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Lyte_Llama-3.2-3B-Overthinker/1762652579.741945", + "retrieved_timestamp": "1762652579.7419462", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Lyte/Llama-3.2-3B-Overthinker", + "developer": "meta", + "inference_platform": "unknown", + "id": "Lyte/Llama-3.2-3B-Overthinker" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6407975283359264 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4320093097952517 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34190625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29853723404255317 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/MLP-KTLim/llama-3-Korean-Bllossom-8B/31a37662-052e-440c-a475-66543b6c52b1.json b/data/hfopenllm_v2/meta/MLP-KTLim/llama-3-Korean-Bllossom-8B/31a37662-052e-440c-a475-66543b6c52b1.json new file mode 100644 index 000000000..b58c8b879 --- /dev/null +++ b/data/hfopenllm_v2/meta/MLP-KTLim/llama-3-Korean-Bllossom-8B/31a37662-052e-440c-a475-66543b6c52b1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MLP-KTLim_llama-3-Korean-Bllossom-8B/1762652579.7427032", + "retrieved_timestamp": "1762652579.7427042", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MLP-KTLim/llama-3-Korean-Bllossom-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "MLP-KTLim/llama-3-Korean-Bllossom-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5112800702136997 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49004556470187666 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10196374622356495 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3674583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.359375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/c819ae59-5f32-4bba-a835-84fa9497de6b.json b/data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/c819ae59-5f32-4bba-a835-84fa9497de6b.json new file mode 100644 index 000000000..bb7702bf4 --- /dev/null +++ b/data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/c819ae59-5f32-4bba-a835-84fa9497de6b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-v0.1/1762652579.744125", + "retrieved_timestamp": "1762652579.7441258", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4027192294223771 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47894081019705514 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04607250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3086979166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30011635638297873 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/ced5680b-ff4a-42be-a609-6fc2541d6109.json b/data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/ced5680b-ff4a-42be-a609-6fc2541d6109.json new file mode 100644 index 000000000..51865012b --- /dev/null +++ b/data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.1/ced5680b-ff4a-42be-a609-6fc2541d6109.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-v0.1/1762652579.743867", + "retrieved_timestamp": "1762652579.7438679", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4118117705465941 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4811441560714845 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.033987915407854986 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3046979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3006150265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.3/f58be76c-043d-4ad9-81df-9a94d380808c.json b/data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.3/f58be76c-043d-4ad9-81df-9a94d380808c.json new file mode 100644 index 000000000..c5d4b21f8 --- /dev/null +++ b/data/hfopenllm_v2/meta/Magpie-Align/Llama-3-8B-Magpie-Align-v0.3/f58be76c-043d-4ad9-81df-9a94d380808c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-v0.3/1762652579.7443142", + "retrieved_timestamp": "1762652579.744315", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.3", + "developer": "meta", + "inference_platform": "unknown", + "id": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44970566984490046 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.456960506522001 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34060416666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31341422872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1/80e08062-397f-40d4-b6b2-a3e03d9cc320.json b/data/hfopenllm_v2/meta/Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1/80e08062-397f-40d4-b6b2-a3e03d9cc320.json new file mode 100644 index 000000000..1a25929c4 --- /dev/null +++ b/data/hfopenllm_v2/meta/Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1/80e08062-397f-40d4-b6b2-a3e03d9cc320.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3.1-8B-Magpie-Align-v0.1/1762652579.744737", + "retrieved_timestamp": "1762652579.744738", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4457838535086903 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46223963164680143 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31406249999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32621343085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/MagusCorp/grpo_lora_enem_llama3_7b/22c931f2-cf99-46b1-b4f8-50db5a172a66.json b/data/hfopenllm_v2/meta/MagusCorp/grpo_lora_enem_llama3_7b/22c931f2-cf99-46b1-b4f8-50db5a172a66.json new file mode 100644 index 000000000..c88f7e384 --- /dev/null +++ b/data/hfopenllm_v2/meta/MagusCorp/grpo_lora_enem_llama3_7b/22c931f2-cf99-46b1-b4f8-50db5a172a66.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MagusCorp_grpo_lora_enem_llama3_7b/1762652579.745377", + "retrieved_timestamp": "1762652579.745378", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MagusCorp/grpo_lora_enem_llama3_7b", + "developer": "meta", + "inference_platform": "unknown", + "id": "MagusCorp/grpo_lora_enem_llama3_7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4723622211288271 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48014581980384746 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1216012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.397125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35738031914893614 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.1-llama3.1-70b/e216df49-368d-457f-9153-e33741b7b847.json b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.1-llama3.1-70b/e216df49-368d-457f-9153-e33741b7b847.json new file mode 100644 index 000000000..0592c1377 --- /dev/null +++ b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.1-llama3.1-70b/e216df49-368d-457f-9153-e33741b7b847.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-llama3.1-70b/1762652579.751613", + "retrieved_timestamp": "1762652579.7516139", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.1-llama3.1-70b", + "developer": "meta", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.1-llama3.1-70b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8434298771703524 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.644755327496552 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41012084592145015 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43803125000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5282579787234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.2-llama3-70b/8b86e8c3-eb04-41a8-91e3-3eef396aca4f.json b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.2-llama3-70b/8b86e8c3-eb04-41a8-91e3-3eef396aca4f.json new file mode 100644 index 000000000..effb498db --- /dev/null +++ b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.2-llama3-70b/8b86e8c3-eb04-41a8-91e3-3eef396aca4f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-llama3-70b/1762652579.753183", + "retrieved_timestamp": "1762652579.753183", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.2-llama3-70b", + "developer": "meta", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.2-llama3-70b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8208486814984242 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6435431762417703 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2394259818731118 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3414429530201342 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4445729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5206948138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.2-llama3.1-70b/9112c2ec-cf0e-4d2c-9261-14ebb8706d69.json b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.2-llama3.1-70b/9112c2ec-cf0e-4d2c-9261-14ebb8706d69.json new file mode 100644 index 000000000..1b7b941b9 --- /dev/null +++ b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.2-llama3.1-70b/9112c2ec-cf0e-4d2c-9261-14ebb8706d69.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-llama3.1-70b/1762652579.753403", + "retrieved_timestamp": "1762652579.753404", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.2-llama3.1-70b", + "developer": "meta", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.2-llama3.1-70b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8592667455684251 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6792920009427085 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43655589123867067 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45415625000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5414727393617021 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.3-llama3-70b/66d7e97b-0a79-4d39-8d6b-cf083239aa93.json b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.3-llama3-70b/66d7e97b-0a79-4d39-8d6b-cf083239aa93.json new file mode 100644 index 000000000..926d4f6d6 --- /dev/null +++ b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.3-llama3-70b/66d7e97b-0a79-4d39-8d6b-cf083239aa93.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-llama3-70b/1762652579.7547278", + "retrieved_timestamp": "1762652579.7547278", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.3-llama3-70b", + "developer": "meta", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.3-llama3-70b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8010401290797307 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6399173489368603 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2326283987915408 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42612500000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5204454787234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.3-llama3.1-70b/7e8b2abe-68e5-445b-ae22-5b827e53b72d.json b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.3-llama3.1-70b/7e8b2abe-68e5-445b-ae22-5b827e53b72d.json new file mode 100644 index 000000000..ff85a2394 --- /dev/null +++ b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.3-llama3.1-70b/7e8b2abe-68e5-445b-ae22-5b827e53b72d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-llama3.1-70b/1762652579.755093", + "retrieved_timestamp": "1762652579.7550972", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.3-llama3.1-70b", + "developer": "meta", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.3-llama3.1-70b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8604657863358112 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6871653740091753 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39274924471299094 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34395973154362414 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45682291666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5363198138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.4-llama3-70b/8cf1e62b-f646-4082-9d10-8cf376154d40.json b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.4-llama3-70b/8cf1e62b-f646-4082-9d10-8cf376154d40.json new file mode 100644 index 000000000..3729d2e3d --- /dev/null +++ b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-2.4-llama3-70b/8cf1e62b-f646-4082-9d10-8cf376154d40.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.4-llama3-70b/1762652579.7565", + "retrieved_timestamp": "1762652579.756501", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.4-llama3-70b", + "developer": "meta", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.4-llama3-70b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5027371817887649 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6418191966839487 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24471299093655588 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33976510067114096 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4287916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5203623670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/MaziyarPanahi/calme-3.1-llamaloi-3b/0acfe83d-3876-4c08-9b26-931450d24bfd.json b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-3.1-llamaloi-3b/0acfe83d-3876-4c08-9b26-931450d24bfd.json new file mode 100644 index 000000000..fc9429cd5 --- /dev/null +++ b/data/hfopenllm_v2/meta/MaziyarPanahi/calme-3.1-llamaloi-3b/0acfe83d-3876-4c08-9b26-931450d24bfd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.1-llamaloi-3b/1762652579.758682", + "retrieved_timestamp": "1762652579.758683", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-3.1-llamaloi-3b", + "developer": "meta", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-3.1-llamaloi-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7375175645066203 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4587340004998879 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1729607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35152083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3204787234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/MoonRide/Llama-3.2-3B-Khelavaster/ed373700-5ff1-4a84-8746-12ec4c278e00.json b/data/hfopenllm_v2/meta/MoonRide/Llama-3.2-3B-Khelavaster/ed373700-5ff1-4a84-8746-12ec4c278e00.json new file mode 100644 index 000000000..c1d5e2a60 --- /dev/null +++ b/data/hfopenllm_v2/meta/MoonRide/Llama-3.2-3B-Khelavaster/ed373700-5ff1-4a84-8746-12ec4c278e00.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MoonRide_Llama-3.2-3B-Khelavaster/1762652579.762122", + "retrieved_timestamp": "1762652579.762123", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MoonRide/Llama-3.2-3B-Khelavaster", + "developer": "meta", + "inference_platform": "unknown", + "id": "MoonRide/Llama-3.2-3B-Khelavaster" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4924954675815725 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45156712929620335 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16163141993957703 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36990625000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31216755319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/NAPS-ai/naps-llama-3_1_instruct-v0.6.0/3378460d-d044-4c7e-ba9f-48cc94f0bc3f.json b/data/hfopenllm_v2/meta/NAPS-ai/naps-llama-3_1_instruct-v0.6.0/3378460d-d044-4c7e-ba9f-48cc94f0bc3f.json new file mode 100644 index 000000000..94de29f65 --- /dev/null +++ b/data/hfopenllm_v2/meta/NAPS-ai/naps-llama-3_1_instruct-v0.6.0/3378460d-d044-4c7e-ba9f-48cc94f0bc3f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama-3_1_instruct-v0.6.0/1762652579.766795", + "retrieved_timestamp": "1762652579.766796", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NAPS-ai/naps-llama-3_1_instruct-v0.6.0", + "developer": "meta", + "inference_platform": "unknown", + "id": "NAPS-ai/naps-llama-3_1_instruct-v0.6.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3280063564675062 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45284530156109354 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06419939577039276 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37390624999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3240525265957447 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/NAPS-ai/naps-llama3.1-70B-v0.2-fp16/16b6df0d-8e1b-4bec-b3f9-060273a4ad15.json b/data/hfopenllm_v2/meta/NAPS-ai/naps-llama3.1-70B-v0.2-fp16/16b6df0d-8e1b-4bec-b3f9-060273a4ad15.json new file mode 100644 index 000000000..50d46df7c --- /dev/null +++ b/data/hfopenllm_v2/meta/NAPS-ai/naps-llama3.1-70B-v0.2-fp16/16b6df0d-8e1b-4bec-b3f9-060273a4ad15.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama3.1-70B-v0.2-fp16/1762652579.7671611", + "retrieved_timestamp": "1762652579.767162", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NAPS-ai/naps-llama3.1-70B-v0.2-fp16", + "developer": "meta", + "inference_platform": "unknown", + "id": "NAPS-ai/naps-llama3.1-70B-v0.2-fp16" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1844993506119319 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3040736853180832 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23909395973154363 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34860416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10987367021276596 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.761 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Naveenpoliasetty/llama3-8B-V2/53ae919d-c56b-415f-87c0-c6273730357b.json b/data/hfopenllm_v2/meta/Naveenpoliasetty/llama3-8B-V2/53ae919d-c56b-415f-87c0-c6273730357b.json new file mode 100644 index 000000000..e4c70bb91 --- /dev/null +++ b/data/hfopenllm_v2/meta/Naveenpoliasetty/llama3-8B-V2/53ae919d-c56b-415f-87c0-c6273730357b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Naveenpoliasetty_llama3-8B-V2/1762652579.769772", + "retrieved_timestamp": "1762652579.769773", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Naveenpoliasetty/llama3-8B-V2", + "developer": "meta", + "inference_platform": "unknown", + "id": "Naveenpoliasetty/llama3-8B-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4122616878770551 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5188657580065063 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07854984894259819 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40813541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3737533244680851 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nekochu/Llama-3.1-8B-German-ORPO/83da2d8f-542c-4d21-88f9-b83f4e960579.json b/data/hfopenllm_v2/meta/Nekochu/Llama-3.1-8B-German-ORPO/83da2d8f-542c-4d21-88f9-b83f4e960579.json new file mode 100644 index 000000000..29f181b94 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nekochu/Llama-3.1-8B-German-ORPO/83da2d8f-542c-4d21-88f9-b83f4e960579.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nekochu_Llama-3.1-8B-German-ORPO/1762652579.7705338", + "retrieved_timestamp": "1762652579.7705338", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nekochu/Llama-3.1-8B-German-ORPO", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nekochu/Llama-3.1-8B-German-ORPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4610710692074806 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4982577044334462 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11706948640483383 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46475 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33934507978723405 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated/ed950058-9f6b-4ed6-9d41-0d2674dc19d1.json b/data/hfopenllm_v2/meta/Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated/ed950058-9f6b-4ed6-9d41-0d2674dc19d1.json new file mode 100644 index 000000000..814929ba4 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated/ed950058-9f6b-4ed6-9d41-0d2674dc19d1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Dolphin3.0-Llama3.1-1B-abliterated/1762652579.772268", + "retrieved_timestamp": "1762652579.772269", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5311883580012146 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3240787338568713 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2407718120805369 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32367708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1373005319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0/67010272-067a-4dd4-a31d-9da58d72118e.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0/67010272-067a-4dd4-a31d-9da58d72118e.json new file mode 100644 index 000000000..6dc920011 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0/67010272-067a-4dd4-a31d-9da58d72118e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DeepDive_3_Prev_v1.0/1762652579.7727091", + "retrieved_timestamp": "1762652579.7727098", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6809144181881852 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5155095936229447 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1865558912386707 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3665833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0/9aa57eda-6d6a-449e-801d-96e16499ddd6.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0/9aa57eda-6d6a-449e-801d-96e16499ddd6.json new file mode 100644 index 000000000..f96997680 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0/9aa57eda-6d6a-449e-801d-96e16499ddd6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0/1762652579.772983", + "retrieved_timestamp": "1762652579.772984", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7100903380807368 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.51203649030939 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19259818731117825 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37576041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34408244680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R/bedae6ba-9f3b-435b-bb7f-cadb7a684804.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R/bedae6ba-9f3b-435b-bb7f-cadb7a684804.json new file mode 100644 index 000000000..ac3153304 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R/bedae6ba-9f3b-435b-bb7f-cadb7a684804.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DobHerWild_R1_v1.1R/1762652579.773223", + "retrieved_timestamp": "1762652579.7732239", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.759999024809727 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.525696414662245 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23187311178247735 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38521875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36884973404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.01/8a3df59d-9f38-4682-a760-5fa7903cab99.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.01/8a3df59d-9f38-4682-a760-5fa7903cab99.json new file mode 100644 index 000000000..f752c7f70 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.01/8a3df59d-9f38-4682-a760-5fa7903cab99.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DoberWild_v2.01/1762652579.7734542", + "retrieved_timestamp": "1762652579.7734542", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_DoberWild_v2.01", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_DoberWild_v2.01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7995662619627034 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5250767747736031 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2001510574018127 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4011875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3790724734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.031 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.02/62ef54cd-d97d-473e-9dd2-42fe185e4d04.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.02/62ef54cd-d97d-473e-9dd2-42fe185e4d04.json new file mode 100644 index 000000000..ffc315033 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.02/62ef54cd-d97d-473e-9dd2-42fe185e4d04.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DoberWild_v2.02/1762652579.7736902", + "retrieved_timestamp": "1762652579.773691", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_DoberWild_v2.02", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_DoberWild_v2.02" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7746368524404137 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.531273698652086 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19939577039274925 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39458333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3764128989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.03/b81cbefe-7c08-4bc2-979f-10caf20fa9fa.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.03/b81cbefe-7c08-4bc2-979f-10caf20fa9fa.json new file mode 100644 index 000000000..e4eef1cff --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DoberWild_v2.03/b81cbefe-7c08-4bc2-979f-10caf20fa9fa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DoberWild_v2.03/1762652579.7739289", + "retrieved_timestamp": "1762652579.77393", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_DoberWild_v2.03", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_DoberWild_v2.03" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7764354135914928 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5294434267893284 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20770392749244712 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3045302013422819 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39058333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37217420212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.01/78ecc0f4-dcd5-4c25-a598-ef95114f5868.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.01/78ecc0f4-dcd5-4c25-a598-ef95114f5868.json new file mode 100644 index 000000000..45be72b9e --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.01/78ecc0f4-dcd5-4c25-a598-ef95114f5868.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DodoWild_v2.01/1762652579.7741492", + "retrieved_timestamp": "1762652579.7741492", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_DodoWild_v2.01", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7977677008116243 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5252760762748857 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1986404833836858 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40896874999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3738364361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.031 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.02/f8448236-89b9-4a9c-949b-9bb45db5e400.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.02/f8448236-89b9-4a9c-949b-9bb45db5e400.json new file mode 100644 index 000000000..4922080ec --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.02/f8448236-89b9-4a9c-949b-9bb45db5e400.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DodoWild_v2.02/1762652579.774375", + "retrieved_timestamp": "1762652579.774376", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_DodoWild_v2.02", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.02" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8016895171478344 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5261737638679802 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22734138972809667 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39706249999999993 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37608045212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.03/3b2b7ebc-be82-4d7d-8bc8-e718513d164c.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.03/3b2b7ebc-be82-4d7d-8bc8-e718513d164c.json new file mode 100644 index 000000000..a48b37fd8 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.03/3b2b7ebc-be82-4d7d-8bc8-e718513d164c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DodoWild_v2.03/1762652579.7746859", + "retrieved_timestamp": "1762652579.774687", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_DodoWild_v2.03", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.03" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7941207108250552 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.530825004382936 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22205438066465258 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3958541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37857380319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.10/ca49f981-e4eb-4235-b472-de832ffedd72.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.10/ca49f981-e4eb-4235-b472-de832ffedd72.json new file mode 100644 index 000000000..3ade95fff --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_DodoWild_v2.10/ca49f981-e4eb-4235-b472-de832ffedd72.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DodoWild_v2.10/1762652579.7749188", + "retrieved_timestamp": "1762652579.7749188", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_DodoWild_v2.10", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.10" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8053863748188141 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5278362703806528 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1971299093655589 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41566666666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3854720744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01/ca856917-9100-41ea-9900-91d12be1de44.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01/ca856917-9100-41ea-9900-91d12be1de44.json new file mode 100644 index 000000000..497378b28 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01/ca856917-9100-41ea-9900-91d12be1de44.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Dolermed_R1_V1.01/1762652579.775126", + "retrieved_timestamp": "1762652579.775127", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7533544329046928 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5312389177563648 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20166163141993956 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37470833333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3732546542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03/b1f9e472-38c5-409f-b112-3006bca90b94.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03/b1f9e472-38c5-409f-b112-3006bca90b94.json new file mode 100644 index 000000000..a0a580be2 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03/b1f9e472-38c5-409f-b112-3006bca90b94.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Dolermed_R1_V1.03/1762652579.7753332", + "retrieved_timestamp": "1762652579.775334", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7564019025075688 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5316448098766001 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20921450151057402 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3800416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37200797872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_V1.01/4733fd17-2d7a-44cd-83bf-1201a3173495.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_V1.01/4733fd17-2d7a-44cd-83bf-1201a3173495.json new file mode 100644 index 000000000..83b5595ce --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolermed_V1.01/4733fd17-2d7a-44cd-83bf-1201a3173495.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Dolermed_V1.01/1762652579.775538", + "retrieved_timestamp": "1762652579.775538", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_Dolermed_V1.01", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_Dolermed_V1.01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.508657030013697 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5193615033347353 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13444108761329304 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39448958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3570478723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.031 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04/9d44d069-44b1-414a-93c1-91b46ceabe66.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04/9d44d069-44b1-414a-93c1-91b46ceabe66.json new file mode 100644 index 000000000..991b2e822 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04/9d44d069-44b1-414a-93c1-91b46ceabe66.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Dolerstormed_V1.04/1762652579.775745", + "retrieved_timestamp": "1762652579.775746", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7889001183526376 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5195180641442355 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19259818731117825 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221476510067114 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4029583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3888796542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04/615e5bca-6f64-4bf9-a131-eefd7ec32c08.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04/615e5bca-6f64-4bf9-a131-eefd7ec32c08.json new file mode 100644 index 000000000..2d6fbfe25 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04/615e5bca-6f64-4bf9-a131-eefd7ec32c08.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Hermedash_R1_V1.04/1762652579.775957", + "retrieved_timestamp": "1762652579.775958", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7871514248859692 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5191641616026265 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1865558912386707 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4110520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38821476063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01/82f2d97c-e8d2-47a4-a56b-af781b98ba0b.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01/82f2d97c-e8d2-47a4-a56b-af781b98ba0b.json new file mode 100644 index 000000000..99a52cb37 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01/82f2d97c-e8d2-47a4-a56b-af781b98ba0b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Hermedive_R1_V1.01/1762652579.7761788", + "retrieved_timestamp": "1762652579.7761788", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5001141415887622 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5170855986734039 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17749244712990936 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40084374999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34266954787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03/e73d5aee-ad0f-4bec-8230-2087669444bb.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03/e73d5aee-ad0f-4bec-8230-2087669444bb.json new file mode 100644 index 000000000..e1c5bee6e --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03/e73d5aee-ad0f-4bec-8230-2087669444bb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Hermedive_R1_V1.03/1762652579.776387", + "retrieved_timestamp": "1762652579.7763882", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6647528557560606 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5140787918844759 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18580060422960726 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3613125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3488198138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_V1.01/99589a08-8f1e-437e-b6f0-e33a9dab5806.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_V1.01/99589a08-8f1e-437e-b6f0-e33a9dab5806.json new file mode 100644 index 000000000..c594c7360 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Hermedive_V1.01/99589a08-8f1e-437e-b6f0-e33a9dab5806.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Hermedive_V1.01/1762652579.776601", + "retrieved_timestamp": "1762652579.776602", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_Hermedive_V1.01", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_Hermedive_V1.01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5061592131101034 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4918197968512548 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36965624999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3550531914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.031 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Mediver_V1.01/35eb03f0-f11e-40d8-a830-7ce2cfde2956.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Mediver_V1.01/35eb03f0-f11e-40d8-a830-7ce2cfde2956.json new file mode 100644 index 000000000..e47707318 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Mediver_V1.01/35eb03f0-f11e-40d8-a830-7ce2cfde2956.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Mediver_V1.01/1762652579.7768", + "retrieved_timestamp": "1762652579.776801", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_Mediver_V1.01", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_Mediver_V1.01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18847103463255274 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44148325896745977 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0015105740181268882 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38978124999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2993683510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.031 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Medusa_v1.01/01b841ba-ecb1-4025-91b7-fb2c443ef85c.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Medusa_v1.01/01b841ba-ecb1-4025-91b7-fb2c443ef85c.json new file mode 100644 index 000000000..9cb73987d --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Medusa_v1.01/01b841ba-ecb1-4025-91b7-fb2c443ef85c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Medusa_v1.01/1762652579.777005", + "retrieved_timestamp": "1762652579.7770061", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_Medusa_v1.01", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_Medusa_v1.01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7685419132346618 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5017727187674992 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14652567975830816 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40667708333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3531416223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.031 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1/1cbff8d9-a857-4816-8427-0450871021d6.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1/1cbff8d9-a857-4816-8427-0450871021d6.json new file mode 100644 index 000000000..726811829 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1/1cbff8d9-a857-4816-8427-0450871021d6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Smarteaz_0.2_R1/1762652579.777212", + "retrieved_timestamp": "1762652579.777212", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6345529860769425 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5112504828088763 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26057401812688824 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4188020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3645279255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Smarteaz_V1.01/10cc1ce1-986e-44f5-b14e-a7b44d9de68d.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Smarteaz_V1.01/10cc1ce1-986e-44f5-b14e-a7b44d9de68d.json new file mode 100644 index 000000000..8d9d801cb --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Smarteaz_V1.01/10cc1ce1-986e-44f5-b14e-a7b44d9de68d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Smarteaz_V1.01/1762652579.777418", + "retrieved_timestamp": "1762652579.777418", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_Smarteaz_V1.01", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_Smarteaz_V1.01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8151283040111349 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5241273021389002 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23413897280966767 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37892708333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3735871010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Stormeder_v1.04/e831c8bd-5bdd-4f00-9c91-ab4b29dfc66c.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Stormeder_v1.04/e831c8bd-5bdd-4f00-9c91-ab4b29dfc66c.json new file mode 100644 index 000000000..a7802c933 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Stormeder_v1.04/e831c8bd-5bdd-4f00-9c91-ab4b29dfc66c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Stormeder_v1.04/1762652579.777617", + "retrieved_timestamp": "1762652579.777618", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_Stormeder_v1.04", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_Stormeder_v1.04" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7852531283660686 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5207086605445487 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18504531722054382 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3948958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38522273936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Typhoon_v1.03/6043c193-a533-4194-8cf5-9ed83d095f0d.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Typhoon_v1.03/6043c193-a533-4194-8cf5-9ed83d095f0d.json new file mode 100644 index 000000000..7a04ae8b7 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.1_8b_Typhoon_v1.03/6043c193-a533-4194-8cf5-9ed83d095f0d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Typhoon_v1.03/1762652579.7778199", + "retrieved_timestamp": "1762652579.7778208", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.1_8b_Typhoon_v1.03", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.1_8b_Typhoon_v1.03" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8078343240379969 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5313965802672672 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22734138972809667 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38146875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3842253989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_AquaSyn_0.1/4b512748-f6d0-4ed0-8ece-5b853a174329.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_AquaSyn_0.1/4b512748-f6d0-4ed0-8ece-5b853a174329.json new file mode 100644 index 000000000..05338fceb --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_AquaSyn_0.1/4b512748-f6d0-4ed0-8ece-5b853a174329.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_AquaSyn_0.1/1762652579.7780669", + "retrieved_timestamp": "1762652579.778068", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.2_1b_AquaSyn_0.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.2_1b_AquaSyn_0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2741004977903075 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3284363786988483 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2483221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34603125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1377992021276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_AquaSyn_0.11/d3e57fb7-44cb-408a-9ed6-6387b1f0a543.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_AquaSyn_0.11/d3e57fb7-44cb-408a-9ed6-6387b1f0a543.json new file mode 100644 index 000000000..ae88e808e --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_AquaSyn_0.11/d3e57fb7-44cb-408a-9ed6-6387b1f0a543.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_AquaSyn_0.11/1762652579.778271", + "retrieved_timestamp": "1762652579.778271", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.2_1b_AquaSyn_0.11", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.2_1b_AquaSyn_0.11" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24312601674667658 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3111956727868642 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.023413897280966767 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3367604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1116190159574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Dolto_0.1/dae3d027-e262-462c-9930-cfee221cef58.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Dolto_0.1/dae3d027-e262-462c-9930-cfee221cef58.json new file mode 100644 index 000000000..03ff977cc --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Dolto_0.1/dae3d027-e262-462c-9930-cfee221cef58.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Dolto_0.1/1762652579.778476", + "retrieved_timestamp": "1762652579.778477", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.2_1b_Dolto_0.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.2_1b_Dolto_0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5433782364127182 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3350056502150862 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03700906344410876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23741610738255034 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.342125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13638630319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Odyssea_V1.01/f3922129-7e69-495d-925b-c3c8a1b70c5a.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Odyssea_V1.01/f3922129-7e69-495d-925b-c3c8a1b70c5a.json new file mode 100644 index 000000000..ef4b1d611 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Odyssea_V1.01/f3922129-7e69-495d-925b-c3c8a1b70c5a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Odyssea_V1.01/1762652579.778893", + "retrieved_timestamp": "1762652579.7788942", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.2_1b_Odyssea_V1.01", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.2_1b_Odyssea_V1.01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24954564998648032 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3044651612138552 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.017371601208459216 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34203125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11519281914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Odyssea_V1/deb8be23-8976-4dfb-b038-70a4b77de9f6.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Odyssea_V1/deb8be23-8976-4dfb-b038-70a4b77de9f6.json new file mode 100644 index 000000000..5525bb322 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Odyssea_V1/deb8be23-8976-4dfb-b038-70a4b77de9f6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Odyssea_V1/1762652579.77868", + "retrieved_timestamp": "1762652579.77868", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.2_1b_Odyssea_V1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.2_1b_Odyssea_V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2552660274737696 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3009715832098017 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33936458333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11527593085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1/11c52cd6-75e0-4800-9b98-fbc4aa81260d.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1/11c52cd6-75e0-4800-9b98-fbc4aa81260d.json new file mode 100644 index 000000000..1bf49da83 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1/11c52cd6-75e0-4800-9b98-fbc4aa81260d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_OpenTree_R1_0.1/1762652579.779097", + "retrieved_timestamp": "1762652579.779098", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5366339091388627 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3279521771600605 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31307291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16747007978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_OrcaSun_V1/dd17eeb9-c1d1-4f98-986e-aad15a592891.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_OrcaSun_V1/dd17eeb9-c1d1-4f98-986e-aad15a592891.json new file mode 100644 index 000000000..f8d893dcb --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_OrcaSun_V1/dd17eeb9-c1d1-4f98-986e-aad15a592891.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_OrcaSun_V1/1762652579.779477", + "retrieved_timestamp": "1762652579.779478", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.2_1b_OrcaSun_V1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.2_1b_OrcaSun_V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5948605256275571 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.355031362479927 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05966767371601209 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23657718120805368 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33803125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19040890957446807 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1/8254ed33-9ce6-484d-9171-5402156a1933.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1/8254ed33-9ce6-484d-9171-5402156a1933.json new file mode 100644 index 000000000..abc8104cd --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1/8254ed33-9ce6-484d-9171-5402156a1933.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_RandomLego_RP_R1_0.1/1762652579.779787", + "retrieved_timestamp": "1762652579.779788", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5542693386880144 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34277067367168224 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3249166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15633311170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_SunOrca_V1/848752ff-c92d-4ce2-94e8-5b8c8b765b77.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_SunOrca_V1/848752ff-c92d-4ce2-94e8-5b8c8b765b77.json new file mode 100644 index 000000000..9b49b0154 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_SunOrca_V1/848752ff-c92d-4ce2-94e8-5b8c8b765b77.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_SunOrca_V1/1762652579.7800052", + "retrieved_timestamp": "1762652579.780006", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.2_1b_SunOrca_V1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.2_1b_SunOrca_V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.542953807009845 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34306447662530104 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06722054380664652 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18841422872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Sydonia_0.1/980cf18c-0163-414c-8ed0-dff894a328ee.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Sydonia_0.1/980cf18c-0163-414c-8ed0-dff894a328ee.json new file mode 100644 index 000000000..a917830e5 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Sydonia_0.1/980cf18c-0163-414c-8ed0-dff894a328ee.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Sydonia_0.1/1762652579.780214", + "retrieved_timestamp": "1762652579.780215", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.2_1b_Sydonia_0.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.2_1b_Sydonia_0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21967047434141412 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31210928710549807 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22818791946308725 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33818750000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12242353723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Syneridol_0.2/99397e12-f601-478c-af40-c8f428b923a8.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Syneridol_0.2/99397e12-f601-478c-af40-c8f428b923a8.json new file mode 100644 index 000000000..0f593eea0 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Syneridol_0.2/99397e12-f601-478c-af40-c8f428b923a8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Syneridol_0.2/1762652579.780447", + "retrieved_timestamp": "1762652579.780447", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.2_1b_Syneridol_0.2", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.2_1b_Syneridol_0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21574865800520399 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3138849872298115 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2348993288590604 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33428125000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12267287234042554 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Synopsys_0.1/00ccf406-3e59-44cb-af59-6dcd391678ff.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Synopsys_0.1/00ccf406-3e59-44cb-af59-6dcd391678ff.json new file mode 100644 index 000000000..dc7610149 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Synopsys_0.1/00ccf406-3e59-44cb-af59-6dcd391678ff.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Synopsys_0.1/1762652579.780673", + "retrieved_timestamp": "1762652579.780674", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.2_1b_Synopsys_0.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.2_1b_Synopsys_0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17638089158987041 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31619439082949846 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23909395973154363 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34609375000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12308843085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Synopsys_0.11/6e4a0c11-2349-4846-9d9b-ccf6ef9ea43a.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Synopsys_0.11/6e4a0c11-2349-4846-9d9b-ccf6ef9ea43a.json new file mode 100644 index 000000000..89d82609a --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_1b_Synopsys_0.11/6e4a0c11-2349-4846-9d9b-ccf6ef9ea43a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Synopsys_0.11/1762652579.780885", + "retrieved_timestamp": "1762652579.780886", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.2_1b_Synopsys_0.11", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.2_1b_Synopsys_0.11" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28421698870109086 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31019707628668325 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35133333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11228390957446809 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v1/f81acd72-b38a-424a-878b-833d094518da.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v1/f81acd72-b38a-424a-878b-833d094518da.json new file mode 100644 index 000000000..fdca1f99c --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v1/f81acd72-b38a-424a-878b-833d094518da.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_3b_Kermes_v1/1762652579.781107", + "retrieved_timestamp": "1762652579.781108", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.2_3b_Kermes_v1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.2_3b_Kermes_v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4851759996808468 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4409910297279671 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030966767371601207 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40702083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2547373670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v2.1/f4686eff-f1d7-49e0-85be-2a6c7f125e29.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v2.1/f4686eff-f1d7-49e0-85be-2a6c7f125e29.json new file mode 100644 index 000000000..c99930ca9 --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v2.1/f4686eff-f1d7-49e0-85be-2a6c7f125e29.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_3b_Kermes_v2.1/1762652579.781543", + "retrieved_timestamp": "1762652579.781544", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.2_3b_Kermes_v2.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.2_3b_Kermes_v2.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5583906257618674 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44638999626044323 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05211480362537765 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3963541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26919880319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v2/a3d85774-ddac-436f-9c64-a751d2924bb5.json b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v2/a3d85774-ddac-436f-9c64-a751d2924bb5.json new file mode 100644 index 000000000..4f5f06a9d --- /dev/null +++ b/data/hfopenllm_v2/meta/Nexesenex/Llama_3.2_3b_Kermes_v2/a3d85774-ddac-436f-9c64-a751d2924bb5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_3b_Kermes_v2/1762652579.781325", + "retrieved_timestamp": "1762652579.781326", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Nexesenex/Llama_3.2_3b_Kermes_v2", + "developer": "meta", + "inference_platform": "unknown", + "id": "Nexesenex/Llama_3.2_3b_Kermes_v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5753766672429155 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44554539692939316 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37781249999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2734375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/NotASI/FineTome-Llama3.2-1B-0929/2346a7eb-2148-49f3-b960-363ba6b776d4.json b/data/hfopenllm_v2/meta/NotASI/FineTome-Llama3.2-1B-0929/2346a7eb-2148-49f3-b960-363ba6b776d4.json new file mode 100644 index 000000000..f43f58516 --- /dev/null +++ b/data/hfopenllm_v2/meta/NotASI/FineTome-Llama3.2-1B-0929/2346a7eb-2148-49f3-b960-363ba6b776d4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NotASI_FineTome-Llama3.2-1B-0929/1762652579.788707", + "retrieved_timestamp": "1762652579.7887082", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NotASI/FineTome-Llama3.2-1B-0929", + "developer": "meta", + "inference_platform": "unknown", + "id": "NotASI/FineTome-Llama3.2-1B-0929" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39907223943580805 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3246274874705644 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03625377643504532 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3487604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1428690159574468 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/NotASI/FineTome-Llama3.2-3B-1002/e701f5dc-d604-4bbb-8e92-37d69781ae5f.json b/data/hfopenllm_v2/meta/NotASI/FineTome-Llama3.2-3B-1002/e701f5dc-d604-4bbb-8e92-37d69781ae5f.json new file mode 100644 index 000000000..f1b4f719f --- /dev/null +++ b/data/hfopenllm_v2/meta/NotASI/FineTome-Llama3.2-3B-1002/e701f5dc-d604-4bbb-8e92-37d69781ae5f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NotASI_FineTome-Llama3.2-3B-1002/1762652579.788946", + "retrieved_timestamp": "1762652579.7889469", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NotASI/FineTome-Llama3.2-3B-1002", + "developer": "meta", + "inference_platform": "unknown", + "id": "NotASI/FineTome-Llama3.2-3B-1002" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5474496558021605 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4319470614025341 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06268882175226587 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25083892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3685104166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24368351063829788 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/NotASI/FineTome-v1.5-Llama3.2-1B-1007/8c67c634-82f0-4bb8-bd70-e98902649d96.json b/data/hfopenllm_v2/meta/NotASI/FineTome-v1.5-Llama3.2-1B-1007/8c67c634-82f0-4bb8-bd70-e98902649d96.json new file mode 100644 index 000000000..82f2ac00a --- /dev/null +++ b/data/hfopenllm_v2/meta/NotASI/FineTome-v1.5-Llama3.2-1B-1007/8c67c634-82f0-4bb8-bd70-e98902649d96.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NotASI_FineTome-v1.5-Llama3.2-1B-1007/1762652579.789186", + "retrieved_timestamp": "1762652579.789187", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NotASI/FineTome-v1.5-Llama3.2-1B-1007", + "developer": "meta", + "inference_platform": "unknown", + "id": "NotASI/FineTome-v1.5-Llama3.2-1B-1007" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39237777984636324 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32405671121485663 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03172205438066465 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34745833333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1427027925531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/NotASI/FineTome-v1.5-Llama3.2-3B-1007/d8a359e5-2899-4d3f-9fb4-3120f61951f4.json b/data/hfopenllm_v2/meta/NotASI/FineTome-v1.5-Llama3.2-3B-1007/d8a359e5-2899-4d3f-9fb4-3120f61951f4.json new file mode 100644 index 000000000..ed8bc632f --- /dev/null +++ b/data/hfopenllm_v2/meta/NotASI/FineTome-v1.5-Llama3.2-3B-1007/d8a359e5-2899-4d3f-9fb4-3120f61951f4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NotASI_FineTome-v1.5-Llama3.2-3B-1007/1762652579.789401", + "retrieved_timestamp": "1762652579.789401", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NotASI/FineTome-v1.5-Llama3.2-3B-1007", + "developer": "meta", + "inference_platform": "unknown", + "id": "NotASI/FineTome-v1.5-Llama3.2-3B-1007" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5507719517546776 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4312372935321582 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06419939577039276 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3645416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2448470744680851 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/NousResearch/Hermes-2-Pro-Llama-3-8B/af47ca72-b9b5-4cf3-84a7-e2f4602e6eaa.json b/data/hfopenllm_v2/meta/NousResearch/Hermes-2-Pro-Llama-3-8B/af47ca72-b9b5-4cf3-84a7-e2f4602e6eaa.json new file mode 100644 index 000000000..b101723e2 --- /dev/null +++ b/data/hfopenllm_v2/meta/NousResearch/Hermes-2-Pro-Llama-3-8B/af47ca72-b9b5-4cf3-84a7-e2f4602e6eaa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-2-Pro-Llama-3-8B/1762652579.78989", + "retrieved_timestamp": "1762652579.789891", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NousResearch/Hermes-2-Pro-Llama-3-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "NousResearch/Hermes-2-Pro-Llama-3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5361839918084017 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.507112624310082 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08383685800604229 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4262395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30518617021276595 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.031 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/NousResearch/Hermes-2-Theta-Llama-3-8B/99c4b14f-8ea6-4f6e-af65-1e2ee58eeca9.json b/data/hfopenllm_v2/meta/NousResearch/Hermes-2-Theta-Llama-3-8B/99c4b14f-8ea6-4f6e-af65-1e2ee58eeca9.json new file mode 100644 index 000000000..08f5298dd --- /dev/null +++ b/data/hfopenllm_v2/meta/NousResearch/Hermes-2-Theta-Llama-3-8B/99c4b14f-8ea6-4f6e-af65-1e2ee58eeca9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-2-Theta-Llama-3-8B/1762652579.79036", + "retrieved_timestamp": "1762652579.79036", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NousResearch/Hermes-2-Theta-Llama-3-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "NousResearch/Hermes-2-Theta-Llama-3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6517883659800441 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5206672260911865 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09667673716012085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3948958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33685172872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.1-70B/e48bd1d8-1082-4b79-8145-87d7f013fb82.json b/data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.1-70B/e48bd1d8-1082-4b79-8145-87d7f013fb82.json new file mode 100644 index 000000000..4db7f02d4 --- /dev/null +++ b/data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.1-70B/e48bd1d8-1082-4b79-8145-87d7f013fb82.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-3-Llama-3.1-70B/1762652579.7905731", + "retrieved_timestamp": "1762652579.7905731", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NousResearch/Hermes-3-Llama-3.1-70B", + "developer": "meta", + "inference_platform": "unknown", + "id": "NousResearch/Hermes-3-Llama-3.1-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7661438316998896 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6755780641387483 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20996978851963746 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3615771812080537 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4948958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47265625 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.1-8B/b9300d76-c854-48a2-a900-b661c1fae7bf.json b/data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.1-8B/b9300d76-c854-48a2-a900-b661c1fae7bf.json new file mode 100644 index 000000000..b449c8011 --- /dev/null +++ b/data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.1-8B/b9300d76-c854-48a2-a900-b661c1fae7bf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-3-Llama-3.1-8B/1762652579.790786", + "retrieved_timestamp": "1762652579.790787", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NousResearch/Hermes-3-Llama-3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "NousResearch/Hermes-3-Llama-3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6170172918966121 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5177452540141246 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4369375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3139128989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.2-3B/7e5f7bc1-1f9a-497a-a903-7d612bb923ca.json b/data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.2-3B/7e5f7bc1-1f9a-497a-a903-7d612bb923ca.json new file mode 100644 index 000000000..ffcd5453e --- /dev/null +++ b/data/hfopenllm_v2/meta/NousResearch/Hermes-3-Llama-3.2-3B/7e5f7bc1-1f9a-497a-a903-7d612bb923ca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-3-Llama-3.2-3B/1762652579.790994", + "retrieved_timestamp": "1762652579.790995", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NousResearch/Hermes-3-Llama-3.2-3B", + "developer": "meta", + "inference_platform": "unknown", + "id": "NousResearch/Hermes-3-Llama-3.2-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3824862476008103 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43519901506714875 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03927492447129909 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40302083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25440492021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/NousResearch/Nous-Hermes-llama-2-7b/6ab36d53-da10-4f80-bd1b-dc037a020362.json b/data/hfopenllm_v2/meta/NousResearch/Nous-Hermes-llama-2-7b/6ab36d53-da10-4f80-bd1b-dc037a020362.json new file mode 100644 index 000000000..a38c19f19 --- /dev/null +++ b/data/hfopenllm_v2/meta/NousResearch/Nous-Hermes-llama-2-7b/6ab36d53-da10-4f80-bd1b-dc037a020362.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-llama-2-7b/1762652579.792065", + "retrieved_timestamp": "1762652579.792066", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NousResearch/Nous-Hermes-llama-2-7b", + "developer": "meta", + "inference_platform": "unknown", + "id": "NousResearch/Nous-Hermes-llama-2-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17290788441335658 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3823937686034717 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42571875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19398271276595744 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-13b-128k/e067537a-a621-483f-b1cf-ee78f57a39da.json b/data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-13b-128k/e067537a-a621-483f-b1cf-ee78f57a39da.json new file mode 100644 index 000000000..879f34c22 --- /dev/null +++ b/data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-13b-128k/e067537a-a621-483f-b1cf-ee78f57a39da.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Llama-2-13b-128k/1762652579.792277", + "retrieved_timestamp": "1762652579.792278", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NousResearch/Yarn-Llama-2-13b-128k", + "developer": "meta", + "inference_platform": "unknown", + "id": "NousResearch/Yarn-Llama-2-13b-128k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16546430138698653 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3826816443733663 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.017371601208459216 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34575 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23204787234042554 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-7b-128k/e3e717a5-a987-4e94-a528-9aafadb6774f.json b/data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-7b-128k/e3e717a5-a987-4e94-a528-9aafadb6774f.json new file mode 100644 index 000000000..84cd94a88 --- /dev/null +++ b/data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-7b-128k/e3e717a5-a987-4e94-a528-9aafadb6774f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Llama-2-7b-128k/1762652579.792481", + "retrieved_timestamp": "1762652579.7924821", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NousResearch/Yarn-Llama-2-7b-128k", + "developer": "meta", + "inference_platform": "unknown", + "id": "NousResearch/Yarn-Llama-2-7b-128k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14847825990593846 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32480295375597734 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39669791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1791057180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-7b-64k/50db2b1d-e0b5-43b1-86e2-5fa55fb3a960.json b/data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-7b-64k/50db2b1d-e0b5-43b1-86e2-5fa55fb3a960.json new file mode 100644 index 000000000..a591d4b0e --- /dev/null +++ b/data/hfopenllm_v2/meta/NousResearch/Yarn-Llama-2-7b-64k/50db2b1d-e0b5-43b1-86e2-5fa55fb3a960.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Llama-2-7b-64k/1762652579.7927492", + "retrieved_timestamp": "1762652579.792753", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NousResearch/Yarn-Llama-2-7b-64k", + "developer": "meta", + "inference_platform": "unknown", + "id": "NousResearch/Yarn-Llama-2-7b-64k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1699856381068897 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3326277865253592 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.393875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17985372340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/OEvortex/Emotional-llama-8B/c2593003-ca2a-4699-8473-a07683e7cd85.json b/data/hfopenllm_v2/meta/OEvortex/Emotional-llama-8B/c2593003-ca2a-4699-8473-a07683e7cd85.json new file mode 100644 index 000000000..0a200b564 --- /dev/null +++ b/data/hfopenllm_v2/meta/OEvortex/Emotional-llama-8B/c2593003-ca2a-4699-8473-a07683e7cd85.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OEvortex_Emotional-llama-8B/1762652579.797152", + "retrieved_timestamp": "1762652579.797153", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OEvortex/Emotional-llama-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "OEvortex/Emotional-llama-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3516369898535885 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4838573702054177 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08157099697885196 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.365875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35347406914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-70b-v21.2-32k/3d49db5c-bcd1-4d2f-9616-c551a53bdebe.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-70b-v21.2-32k/3d49db5c-bcd1-4d2f-9616-c551a53bdebe.json new file mode 100644 index 000000000..2051ecccd --- /dev/null +++ b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-70b-v21.2-32k/3d49db5c-bcd1-4d2f-9616-c551a53bdebe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3-70b-v21.2-32k/1762652579.8002949", + "retrieved_timestamp": "1762652579.8002958", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-llama3-70b-v21.2-32k", + "developer": "meta", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-llama3-70b-v21.2-32k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7010476646409305 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6507443429944494 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20317220543806647 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3422818791946309 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45796875000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4832114361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-8b-v21.1-8k/2a86c8f6-2aed-4e0c-ad8a-e9ff5065a1e4.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-8b-v21.1-8k/2a86c8f6-2aed-4e0c-ad8a-e9ff5065a1e4.json new file mode 100644 index 000000000..7d1b2abba --- /dev/null +++ b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-8b-v21.1-8k/2a86c8f6-2aed-4e0c-ad8a-e9ff5065a1e4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3-8b-v21.1-8k/1762652579.800596", + "retrieved_timestamp": "1762652579.800596", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-llama3-8b-v21.1-8k", + "developer": "meta", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-llama3-8b-v21.1-8k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5569666263292509 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47875007373484046 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3987708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2954621010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-8b-v21.2-32k/960fabe4-5395-4d3f-9680-65fe0b8655ac.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-8b-v21.2-32k/960fabe4-5395-4d3f-9680-65fe0b8655ac.json new file mode 100644 index 000000000..f522aa053 --- /dev/null +++ b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3-8b-v21.2-32k/960fabe4-5395-4d3f-9680-65fe0b8655ac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3-8b-v21.2-32k/1762652579.800807", + "retrieved_timestamp": "1762652579.800808", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-llama3-8b-v21.2-32k", + "developer": "meta", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-llama3-8b-v21.2-32k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6191904147661538 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4856219845879779 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07854984894259819 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.377875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3298703457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k/77d10b46-e3cf-42a0-b215-f9f8ff5ef60d.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k/77d10b46-e3cf-42a0-b215-f9f8ff5ef60d.json new file mode 100644 index 000000000..ae4212b02 --- /dev/null +++ b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k/77d10b46-e3cf-42a0-b215-f9f8ff5ef60d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.1-70b-v22.1-131k/1762652579.801551", + "retrieved_timestamp": "1762652579.801553", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k", + "developer": "meta", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7332710541363582 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6698491606025763 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3950151057401813 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46295833333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5304188829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k/b57cd648-1503-4bbf-81d7-4ca72ac9ff27.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k/b57cd648-1503-4bbf-81d7-4ca72ac9ff27.json new file mode 100644 index 000000000..6627c2b43 --- /dev/null +++ b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k/b57cd648-1503-4bbf-81d7-4ca72ac9ff27.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.1-8b-v22.2-131k/1762652579.801888", + "retrieved_timestamp": "1762652579.801889", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k", + "developer": "meta", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6657269378582162 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5006515954024578 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1148036253776435 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40810416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3310339095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k/7abaa7f8-8378-496c-b5f8-ac9046eeccc8.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k/7abaa7f8-8378-496c-b5f8-ac9046eeccc8.json new file mode 100644 index 000000000..dfca23efa --- /dev/null +++ b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k/7abaa7f8-8378-496c-b5f8-ac9046eeccc8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.1-8b-v22.3-131k/1762652579.8021362", + "retrieved_timestamp": "1762652579.802138", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k", + "developer": "meta", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5997065563815123 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5065914870348772 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40146875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3277094414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k/85379044-198d-4fb5-82c8-50857f8d65d0.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k/85379044-198d-4fb5-82c8-50857f8d65d0.json new file mode 100644 index 000000000..1d9948a35 --- /dev/null +++ b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k/85379044-198d-4fb5-82c8-50857f8d65d0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.2-1b-v23.1-131k/1762652579.802413", + "retrieved_timestamp": "1762652579.8024142", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k", + "developer": "meta", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3590052172679601 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3266563226631131 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.024924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33421875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1840093085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k/6d6e86f6-f1b7-42ef-9581-b0542e6e12ef.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k/6d6e86f6-f1b7-42ef-9581-b0542e6e12ef.json new file mode 100644 index 000000000..a241049d4 --- /dev/null +++ b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k/6d6e86f6-f1b7-42ef-9581-b0542e6e12ef.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.2-3b-v23.2-131k/1762652579.802651", + "retrieved_timestamp": "1762652579.802652", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k", + "developer": "meta", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4319450169993395 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4072660342069299 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3263125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2479222074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k/49768a60-0b77-4945-a048-013a6fb719ca.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k/49768a60-0b77-4945-a048-013a6fb719ca.json new file mode 100644 index 000000000..7555a2a1a --- /dev/null +++ b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k/49768a60-0b77-4945-a048-013a6fb719ca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.3-70b-v24.1-131k/1762652579.802965", + "retrieved_timestamp": "1762652579.8029802", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k", + "developer": "meta", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.812080834408259 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6858038620320306 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44108761329305135 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43456375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4869270833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5327460106382979 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k/489b8b24-4295-41b3-b286-14f79972fe93.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k/489b8b24-4295-41b3-b286-14f79972fe93.json new file mode 100644 index 000000000..ebce075ee --- /dev/null +++ b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k/489b8b24-4295-41b3-b286-14f79972fe93.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwen2.5llamaify-14b-v23.1-200k/1762652579.804163", + "retrieved_timestamp": "1762652579.8041642", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k", + "developer": "meta", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.630880508162786 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.601319898776811 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2537764350453172 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33305369127516776 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42404166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4673371010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k/ce4e7736-51d8-431a-9bef-ac2bcb3ff0fe.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k/ce4e7736-51d8-431a-9bef-ac2bcb3ff0fe.json new file mode 100644 index 000000000..e825c433e --- /dev/null +++ b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k/ce4e7736-51d8-431a-9bef-ac2bcb3ff0fe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwen2.5llamaify-14b-v23.3-200k/1762652579.8044102", + "retrieved_timestamp": "1762652579.804411", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k", + "developer": "meta", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6131453432448126 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6080855261046028 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2311178247734139 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271812080536913 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4345833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4794714095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k/d5f3ca22-b682-47c6-a7ba-93b401cb8c8f.json b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k/d5f3ca22-b682-47c6-a7ba-93b401cb8c8f.json new file mode 100644 index 000000000..e759d0b43 --- /dev/null +++ b/data/hfopenllm_v2/meta/OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k/d5f3ca22-b682-47c6-a7ba-93b401cb8c8f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwen2.5llamaify-7b-v23.1-200k/1762652579.804652", + "retrieved_timestamp": "1762652579.8046532", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k", + "developer": "meta", + "inference_platform": "unknown", + "id": "OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5672582082208539 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5509381466888461 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18882175226586104 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43632291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.394780585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.615 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/OpenLeecher/llama3-8b-lima/b482d6e6-8520-4a77-a729-ebe2e9635a6c.json b/data/hfopenllm_v2/meta/OpenLeecher/llama3-8b-lima/b482d6e6-8520-4a77-a729-ebe2e9635a6c.json new file mode 100644 index 000000000..ffc17ba53 --- /dev/null +++ b/data/hfopenllm_v2/meta/OpenLeecher/llama3-8b-lima/b482d6e6-8520-4a77-a729-ebe2e9635a6c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenLeecher_llama3-8b-lima/1762652579.807648", + "retrieved_timestamp": "1762652579.8076491", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenLeecher/llama3-8b-lima", + "developer": "meta", + "inference_platform": "unknown", + "id": "OpenLeecher/llama3-8b-lima" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43706587410293574 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4295828632822993 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23825503355704697 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37127083333333327 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26263297872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/OpenScholar/Llama-3.1_OpenScholar-8B/1e6ea564-30ff-4db3-8bb6-070da34e3fb5.json b/data/hfopenllm_v2/meta/OpenScholar/Llama-3.1_OpenScholar-8B/1e6ea564-30ff-4db3-8bb6-070da34e3fb5.json new file mode 100644 index 000000000..26c120844 --- /dev/null +++ b/data/hfopenllm_v2/meta/OpenScholar/Llama-3.1_OpenScholar-8B/1e6ea564-30ff-4db3-8bb6-070da34e3fb5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/OpenScholar_Llama-3.1_OpenScholar-8B/1762652579.807913", + "retrieved_timestamp": "1762652579.807913", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "OpenScholar/Llama-3.1_OpenScholar-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "OpenScholar/Llama-3.1_OpenScholar-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6064010159709571 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5207740834450674 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16540785498489427 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4275104166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.370844414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2/3b02898e-b47f-4d53-9bd4-575d47df29af.json b/data/hfopenllm_v2/meta/Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2/3b02898e-b47f-4d53-9bd4-575d47df29af.json new file mode 100644 index 000000000..56e8706c0 --- /dev/null +++ b/data/hfopenllm_v2/meta/Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2/3b02898e-b47f-4d53-9bd4-575d47df29af.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Orenguteng_Llama-3.1-8B-Lexi-Uncensored-V2/1762652579.808416", + "retrieved_timestamp": "1762652579.808417", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2", + "developer": "meta", + "inference_platform": "unknown", + "id": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7791581891603169 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5084008018783934 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1971299093655589 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3842916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3780751329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Orenguteng/Llama-3.1-8B-Lexi-Uncensored/fe095b66-350c-4236-ab1b-e2e19af73486.json b/data/hfopenllm_v2/meta/Orenguteng/Llama-3.1-8B-Lexi-Uncensored/fe095b66-350c-4236-ab1b-e2e19af73486.json new file mode 100644 index 000000000..8b863f38b --- /dev/null +++ b/data/hfopenllm_v2/meta/Orenguteng/Llama-3.1-8B-Lexi-Uncensored/fe095b66-350c-4236-ab1b-e2e19af73486.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Orenguteng_Llama-3.1-8B-Lexi-Uncensored/1762652579.8081658", + "retrieved_timestamp": "1762652579.808167", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored", + "developer": "meta", + "inference_platform": "unknown", + "id": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7776843220432896 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5057261652642643 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15709969788519637 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3871145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37898936170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B/0130c0ac-a790-492d-aac2-55e999b724ef.json b/data/hfopenllm_v2/meta/PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B/0130c0ac-a790-492d-aac2-55e999b724ef.json new file mode 100644 index 000000000..f1fb1518f --- /dev/null +++ b/data/hfopenllm_v2/meta/PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B/0130c0ac-a790-492d-aac2-55e999b724ef.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.1-RomboTiesTest-8B/1762652579.8100638", + "retrieved_timestamp": "1762652579.8100648", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7825303527972447 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5073267838961463 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2001510574018127 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3869895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3767453457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B/dbfe2c89-a7c8-4fe5-95a1-cf1a58b6f55c.json b/data/hfopenllm_v2/meta/PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B/dbfe2c89-a7c8-4fe5-95a1-cf1a58b6f55c.json new file mode 100644 index 000000000..2f3263f10 --- /dev/null +++ b/data/hfopenllm_v2/meta/PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B/dbfe2c89-a7c8-4fe5-95a1-cf1a58b6f55c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.1-RomboTiesTest2-8B/1762652579.810312", + "retrieved_timestamp": "1762652579.810313", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7825303527972447 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5073267838961463 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2001510574018127 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3869895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3767453457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.015 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/PJMixers/LLaMa-3-CursedStock-v2.0-8B/4f7c69a5-70e5-4f7b-9520-9fa9e642df57.json b/data/hfopenllm_v2/meta/PJMixers/LLaMa-3-CursedStock-v2.0-8B/4f7c69a5-70e5-4f7b-9520-9fa9e642df57.json new file mode 100644 index 000000000..15b30bc1a --- /dev/null +++ b/data/hfopenllm_v2/meta/PJMixers/LLaMa-3-CursedStock-v2.0-8B/4f7c69a5-70e5-4f7b-9520-9fa9e642df57.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PJMixers_LLaMa-3-CursedStock-v2.0-8B/1762652579.809348", + "retrieved_timestamp": "1762652579.809348", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PJMixers/LLaMa-3-CursedStock-v2.0-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "PJMixers/LLaMa-3-CursedStock-v2.0-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6330791189599152 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.527115950402997 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09441087613293052 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38562500000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3556349734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/RLHFlow/ArmoRM-Llama3-8B-v0.1/b8ce63dd-5c8a-4bba-b381-147efcdcc161.json b/data/hfopenllm_v2/meta/RLHFlow/ArmoRM-Llama3-8B-v0.1/b8ce63dd-5c8a-4bba-b381-147efcdcc161.json new file mode 100644 index 000000000..bbda12b27 --- /dev/null +++ b/data/hfopenllm_v2/meta/RLHFlow/ArmoRM-Llama3-8B-v0.1/b8ce63dd-5c8a-4bba-b381-147efcdcc161.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/RLHFlow_ArmoRM-Llama3-8B-v0.1/1762652579.8493571", + "retrieved_timestamp": "1762652579.8493571", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "RLHFlow/ArmoRM-Llama3-8B-v0.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "RLHFlow/ArmoRM-Llama3-8B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18967007539993883 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2876467446788138 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24916107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3948020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10779587765957446 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForRewardModelWithGating", + "params_billions": 7.511 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Replete-AI/Replete-Coder-Llama3-8B/c8b29113-7815-4cf3-be36-76e3e87d6068.json b/data/hfopenllm_v2/meta/Replete-AI/Replete-Coder-Llama3-8B/c8b29113-7815-4cf3-be36-76e3e87d6068.json new file mode 100644 index 000000000..955eff5ae --- /dev/null +++ b/data/hfopenllm_v2/meta/Replete-AI/Replete-Coder-Llama3-8B/c8b29113-7815-4cf3-be36-76e3e87d6068.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-Coder-Llama3-8B/1762652579.851821", + "retrieved_timestamp": "1762652579.851821", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Replete-AI/Replete-Coder-Llama3-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "Replete-AI/Replete-Coder-Llama3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4729362535849324 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271277102526684 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26090604026845643 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39530208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13306183510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Replete-AI/Replete-LLM-V2-Llama-3.1-8b/c3977d28-b18d-4e86-bc69-1aa08422585c.json b/data/hfopenllm_v2/meta/Replete-AI/Replete-LLM-V2-Llama-3.1-8b/c3977d28-b18d-4e86-bc69-1aa08422585c.json new file mode 100644 index 000000000..c901ad9ac --- /dev/null +++ b/data/hfopenllm_v2/meta/Replete-AI/Replete-LLM-V2-Llama-3.1-8b/c3977d28-b18d-4e86-bc69-1aa08422585c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-LLM-V2-Llama-3.1-8b/1762652579.8529909", + "retrieved_timestamp": "1762652579.852992", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Replete-AI/Replete-LLM-V2-Llama-3.1-8b", + "developer": "meta", + "inference_platform": "unknown", + "id": "Replete-AI/Replete-LLM-V2-Llama-3.1-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5514966954347797 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5339203611594218 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1404833836858006 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4000729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37533244680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/SaisExperiments/RightSheep-Llama3.2-3B/4ef7907b-270f-45dc-8f18-88c62c1c8bfe.json b/data/hfopenllm_v2/meta/SaisExperiments/RightSheep-Llama3.2-3B/4ef7907b-270f-45dc-8f18-88c62c1c8bfe.json new file mode 100644 index 000000000..32c501d9e --- /dev/null +++ b/data/hfopenllm_v2/meta/SaisExperiments/RightSheep-Llama3.2-3B/4ef7907b-270f-45dc-8f18-88c62c1c8bfe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SaisExperiments_RightSheep-Llama3.2-3B/1762652579.8563251", + "retrieved_timestamp": "1762652579.8563259", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SaisExperiments/RightSheep-Llama3.2-3B", + "developer": "meta", + "inference_platform": "unknown", + "id": "SaisExperiments/RightSheep-Llama3.2-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4156338515139829 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42407794300783824 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08081570996978851 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3767291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25398936170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Sakalti/Llama3.2-3B-Uranus-1/aba2e376-936d-4960-a82b-da09d2266826.json b/data/hfopenllm_v2/meta/Sakalti/Llama3.2-3B-Uranus-1/aba2e376-936d-4960-a82b-da09d2266826.json new file mode 100644 index 000000000..57fc4ff51 --- /dev/null +++ b/data/hfopenllm_v2/meta/Sakalti/Llama3.2-3B-Uranus-1/aba2e376-936d-4960-a82b-da09d2266826.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Llama3.2-3B-Uranus-1/1762652579.8570151", + "retrieved_timestamp": "1762652579.857016", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Llama3.2-3B-Uranus-1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Sakalti/Llama3.2-3B-Uranus-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5335365718515761 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44368258173181263 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14954682779456194 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3668645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3094248670212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B/ed1798c0-348f-4294-b546-8a7892225d33.json b/data/hfopenllm_v2/meta/SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B/ed1798c0-348f-4294-b546-8a7892225d33.json new file mode 100644 index 000000000..82bbf0d86 --- /dev/null +++ b/data/hfopenllm_v2/meta/SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B/ed1798c0-348f-4294-b546-8a7892225d33.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SentientAGI_Dobby-Mini-Leashed-Llama-3.1-8B/1762652579.878995", + "retrieved_timestamp": "1762652579.878996", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7847034756667863 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5138053850165866 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18580060422960726 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.425375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36943151595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B/6ac51916-9278-46b6-9b0f-059745f3d845.json b/data/hfopenllm_v2/meta/SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B/6ac51916-9278-46b6-9b0f-059745f3d845.json new file mode 100644 index 000000000..b7e428e1d --- /dev/null +++ b/data/hfopenllm_v2/meta/SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B/6ac51916-9278-46b6-9b0f-059745f3d845.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SentientAGI_Dobby-Mini-Unhinged-Llama-3.1-8B/1762652579.879248", + "retrieved_timestamp": "1762652579.879248", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7456858912130924 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5142440064892148 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40128125000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35846077127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Sicarius-Prototyping/Brainy_LLAMA/83fd7abf-00b0-4242-b8c3-87ef9c40dfcf.json b/data/hfopenllm_v2/meta/Sicarius-Prototyping/Brainy_LLAMA/83fd7abf-00b0-4242-b8c3-87ef9c40dfcf.json new file mode 100644 index 000000000..3f4319f54 --- /dev/null +++ b/data/hfopenllm_v2/meta/Sicarius-Prototyping/Brainy_LLAMA/83fd7abf-00b0-4242-b8c3-87ef9c40dfcf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sicarius-Prototyping_Brainy_LLAMA/1762652579.880492", + "retrieved_timestamp": "1762652579.8804932", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sicarius-Prototyping/Brainy_LLAMA", + "developer": "meta", + "inference_platform": "unknown", + "id": "Sicarius-Prototyping/Brainy_LLAMA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5204224790223274 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5117131754488634 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1336858006042296 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4143333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3848902925531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/SicariusSicariiStuff/Impish_LLAMA_3B/9235cd92-5335-498e-881f-21938da4ed61.json b/data/hfopenllm_v2/meta/SicariusSicariiStuff/Impish_LLAMA_3B/9235cd92-5335-498e-881f-21938da4ed61.json new file mode 100644 index 000000000..e0829b492 --- /dev/null +++ b/data/hfopenllm_v2/meta/SicariusSicariiStuff/Impish_LLAMA_3B/9235cd92-5335-498e-881f-21938da4ed61.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Impish_LLAMA_3B/1762652579.882116", + "retrieved_timestamp": "1762652579.882117", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SicariusSicariiStuff/Impish_LLAMA_3B", + "developer": "meta", + "inference_platform": "unknown", + "id": "SicariusSicariiStuff/Impish_LLAMA_3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46299485365496884 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40905101627873225 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11253776435045318 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3672708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2941323138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA/27e6623c-49b2-4763-ac6f-b35f1f9002a8.json b/data/hfopenllm_v2/meta/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA/27e6623c-49b2-4763-ac6f-b35f1f9002a8.json new file mode 100644 index 000000000..804d05879 --- /dev/null +++ b/data/hfopenllm_v2/meta/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA/27e6623c-49b2-4763-ac6f-b35f1f9002a8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_LLAMA-3_8B_Unaligned_BETA/1762652579.883067", + "retrieved_timestamp": "1762652579.883067", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA", + "developer": "meta", + "inference_platform": "unknown", + "id": "SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3713203189758729 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4717234028484832 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08383685800604229 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41194791666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3464926861702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1/da7be2d8-96ff-4902-9628-c1781391c68e.json b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1/da7be2d8-96ff-4902-9628-c1781391c68e.json new file mode 100644 index 000000000..11b66595e --- /dev/null +++ b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1/da7be2d8-96ff-4902-9628-c1781391c68e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.1-8B-lora-epoch1/1762652579.8857", + "retrieved_timestamp": "1762652579.8857012", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1", + "developer": "meta", + "inference_platform": "unknown", + "id": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5058345190760515 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5088388495224864 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15483383685800603 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3997916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3777426861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.1-8B-lora/fffe8411-9f9c-48ce-adb5-8d483022bffe.json b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.1-8B-lora/fffe8411-9f9c-48ce-adb5-8d483022bffe.json new file mode 100644 index 000000000..69795ab1e --- /dev/null +++ b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.1-8B-lora/fffe8411-9f9c-48ce-adb5-8d483022bffe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.1-8B-lora/1762652579.88546", + "retrieved_timestamp": "1762652579.885461", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora", + "developer": "meta", + "inference_platform": "unknown", + "id": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5058345190760515 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5088388495224864 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15483383685800603 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3997916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3777426861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3/d0e4c608-0c64-4cf4-aee6-714475d500db.json b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3/d0e4c608-0c64-4cf4-aee6-714475d500db.json new file mode 100644 index 000000000..9ad8e513d --- /dev/null +++ b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3/d0e4c608-0c64-4cf4-aee6-714475d500db.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-epoch3/1762652579.8859022", + "retrieved_timestamp": "1762652579.8859022", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3", + "developer": "meta", + "inference_platform": "unknown", + "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3247084402718121 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3166586087861201 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027190332326283987 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33815625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12790890957446807 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5/19c08486-99c5-4f53-a6cc-69cb58e0808a.json b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5/19c08486-99c5-4f53-a6cc-69cb58e0808a.json new file mode 100644 index 000000000..2bdafc763 --- /dev/null +++ b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5/19c08486-99c5-4f53-a6cc-69cb58e0808a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-epoch5/1762652579.8861618", + "retrieved_timestamp": "1762652579.886163", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5", + "developer": "meta", + "inference_platform": "unknown", + "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4359920566319587 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34060156188911545 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05211480362537765 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3471458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19456449468085107 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3/f45610c5-ead3-4670-9639-aa30fb145829.json b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3/f45610c5-ead3-4670-9639-aa30fb145829.json new file mode 100644 index 000000000..c11bd39fd --- /dev/null +++ b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3/f45610c5-ead3-4670-9639-aa30fb145829.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-v2-epoch3/1762652579.886383", + "retrieved_timestamp": "1762652579.886384", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3", + "developer": "meta", + "inference_platform": "unknown", + "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4359920566319587 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34060156188911545 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05211480362537765 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3471458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19456449468085107 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5/34a1eda3-2a02-4522-955a-7ed3f1ee97d6.json b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5/34a1eda3-2a02-4522-955a-7ed3f1ee97d6.json new file mode 100644 index 000000000..5fbe8a6c5 --- /dev/null +++ b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5/34a1eda3-2a02-4522-955a-7ed3f1ee97d6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-v2-epoch5/1762652579.8865862", + "retrieved_timestamp": "1762652579.886587", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5", + "developer": "meta", + "inference_platform": "unknown", + "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42467652495378927 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33968360414253995 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34584375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19456449468085107 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1/08fdfb9e-7998-4483-bb1a-4ea7f0e2980e.json b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1/08fdfb9e-7998-4483-bb1a-4ea7f0e2980e.json new file mode 100644 index 000000000..61c97e832 --- /dev/null +++ b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1/08fdfb9e-7998-4483-bb1a-4ea7f0e2980e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch1/1762652579.886793", + "retrieved_timestamp": "1762652579.886794", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1", + "developer": "meta", + "inference_platform": "unknown", + "id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5331121424487028 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4399628268031015 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14577039274924472 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35222916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30044880319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2/37a5a439-e2ac-46ec-af94-b60f127157de.json b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2/37a5a439-e2ac-46ec-af94-b60f127157de.json new file mode 100644 index 000000000..180571bde --- /dev/null +++ b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2/37a5a439-e2ac-46ec-af94-b60f127157de.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch2/1762652579.887009", + "retrieved_timestamp": "1762652579.88701", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2", + "developer": "meta", + "inference_platform": "unknown", + "id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5331121424487028 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4399628268031015 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14577039274924472 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35222916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30044880319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3/6d191a68-8817-468a-850b-01f5ba76e05f.json b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3/6d191a68-8817-468a-850b-01f5ba76e05f.json new file mode 100644 index 000000000..643f57ba6 --- /dev/null +++ b/data/hfopenllm_v2/meta/SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3/6d191a68-8817-468a-850b-01f5ba76e05f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch3/1762652579.887351", + "retrieved_timestamp": "1762652579.8873532", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3", + "developer": "meta", + "inference_platform": "unknown", + "id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5331121424487028 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4399628268031015 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14577039274924472 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35222916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30044880319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Skywork/Skywork-o1-Open-Llama-3.1-8B/e98879cc-d7fd-4e97-ab86-0ca28265abeb.json b/data/hfopenllm_v2/meta/Skywork/Skywork-o1-Open-Llama-3.1-8B/e98879cc-d7fd-4e97-ab86-0ca28265abeb.json new file mode 100644 index 000000000..9d6f5a34d --- /dev/null +++ b/data/hfopenllm_v2/meta/Skywork/Skywork-o1-Open-Llama-3.1-8B/e98879cc-d7fd-4e97-ab86-0ca28265abeb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Skywork_Skywork-o1-Open-Llama-3.1-8B/1762652579.8887959", + "retrieved_timestamp": "1762652579.888797", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Skywork/Skywork-o1-Open-Llama-3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "Skywork/Skywork-o1-Open-Llama-3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3518364605912313 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45159089701897237 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5211480362537765 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31564583333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20304188829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2/b36e0fba-9fa1-4e74-9d26-b4889343f113.json b/data/hfopenllm_v2/meta/Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2/b36e0fba-9fa1-4e74-9d26-b4889343f113.json new file mode 100644 index 000000000..bfbe54d7b --- /dev/null +++ b/data/hfopenllm_v2/meta/Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2/b36e0fba-9fa1-4e74-9d26-b4889343f113.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Solshine_Llama-3-1-big-thoughtful-passthrough-merge-2/1762652579.889379", + "retrieved_timestamp": "1762652579.88938", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2", + "developer": "meta", + "inference_platform": "unknown", + "id": "Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25466650709007654 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32093808427144627 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38894791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11851728723404255 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 18.5 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/T145/Llama-3.1-8B-Zeus/e0889500-8f6e-496c-b275-ac110458c56d.json b/data/hfopenllm_v2/meta/T145/Llama-3.1-8B-Zeus/e0889500-8f6e-496c-b275-ac110458c56d.json new file mode 100644 index 000000000..7d8b38ab1 --- /dev/null +++ b/data/hfopenllm_v2/meta/T145/Llama-3.1-8B-Zeus/e0889500-8f6e-496c-b275-ac110458c56d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/T145_Llama-3.1-8B-Zeus/1762652579.900112", + "retrieved_timestamp": "1762652579.9001129", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "T145/Llama-3.1-8B-Zeus", + "developer": "meta", + "inference_platform": "unknown", + "id": "T145/Llama-3.1-8B-Zeus" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35176110497923285 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3671175348446849 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33158333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1332280585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Tarek07/Progenitor-V1.1-LLaMa-70B/8638b115-f092-42f1-949d-162321fe5833.json b/data/hfopenllm_v2/meta/Tarek07/Progenitor-V1.1-LLaMa-70B/8638b115-f092-42f1-949d-162321fe5833.json new file mode 100644 index 000000000..9d02dc74a --- /dev/null +++ b/data/hfopenllm_v2/meta/Tarek07/Progenitor-V1.1-LLaMa-70B/8638b115-f092-42f1-949d-162321fe5833.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Tarek07_Progenitor-V1.1-LLaMa-70B/1762652579.911703", + "retrieved_timestamp": "1762652579.911703", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tarek07/Progenitor-V1.1-LLaMa-70B", + "developer": "meta", + "inference_platform": "unknown", + "id": "Tarek07/Progenitor-V1.1-LLaMa-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6906064796960952 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6971116049173388 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35725075528700906 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45805369127516776 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47356250000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5465425531914894 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Tarek07/Thalassic-Alpha-LLaMa-70B/a20052ae-dfa0-4df7-a9a6-f182dbef513d.json b/data/hfopenllm_v2/meta/Tarek07/Thalassic-Alpha-LLaMa-70B/a20052ae-dfa0-4df7-a9a6-f182dbef513d.json new file mode 100644 index 000000000..dff9e9746 --- /dev/null +++ b/data/hfopenllm_v2/meta/Tarek07/Thalassic-Alpha-LLaMa-70B/a20052ae-dfa0-4df7-a9a6-f182dbef513d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Tarek07_Thalassic-Alpha-LLaMa-70B/1762652579.9119601", + "retrieved_timestamp": "1762652579.911961", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Tarek07/Thalassic-Alpha-LLaMa-70B", + "developer": "meta", + "inference_platform": "unknown", + "id": "Tarek07/Thalassic-Alpha-LLaMa-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7003484088884161 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6940408286616311 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3149546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4437919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4801979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.543467420212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/TencentARC/LLaMA-Pro-8B/8d2c510b-a092-4e5d-b468-6e58501cad8a.json b/data/hfopenllm_v2/meta/TencentARC/LLaMA-Pro-8B/8d2c510b-a092-4e5d-b468-6e58501cad8a.json new file mode 100644 index 000000000..c5d0c8a85 --- /dev/null +++ b/data/hfopenllm_v2/meta/TencentARC/LLaMA-Pro-8B/8d2c510b-a092-4e5d-b468-6e58501cad8a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TencentARC_LLaMA-Pro-8B/1762652579.912878", + "retrieved_timestamp": "1762652579.912879", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TencentARC/LLaMA-Pro-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "TencentARC/LLaMA-Pro-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2277135777514772 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3484197711435169 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0188821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40181249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18110039893617022 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.357 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/TheDrummer/Llama-3SOME-8B-v2/8f4349ad-76e7-4ce5-9121-fef2e376b4bc.json b/data/hfopenllm_v2/meta/TheDrummer/Llama-3SOME-8B-v2/8f4349ad-76e7-4ce5-9121-fef2e376b4bc.json new file mode 100644 index 000000000..3d450c225 --- /dev/null +++ b/data/hfopenllm_v2/meta/TheDrummer/Llama-3SOME-8B-v2/8f4349ad-76e7-4ce5-9121-fef2e376b4bc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TheDrummer_Llama-3SOME-8B-v2/1762652579.914594", + "retrieved_timestamp": "1762652579.9145951", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TheDrummer/Llama-3SOME-8B-v2", + "developer": "meta", + "inference_platform": "unknown", + "id": "TheDrummer/Llama-3SOME-8B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4508049752434651 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5203347869042534 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09365558912386707 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3832708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37533244680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T/99c5044d-1308-4f30-9413-bc2672545f76.json b/data/hfopenllm_v2/meta/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T/99c5044d-1308-4f30-9413-bc2672545f76.json new file mode 100644 index 000000000..1a62dbaa1 --- /dev/null +++ b/data/hfopenllm_v2/meta/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T/99c5044d-1308-4f30-9413-bc2672545f76.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-intermediate-step-1431k-3T/1762652579.9195771", + "retrieved_timestamp": "1762652579.919578", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", + "developer": "meta", + "inference_platform": "unknown", + "id": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22766371006706648 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3071188438267271 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33803125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11203457446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/TinyLlama/TinyLlama_v1.1/e81db661-b05a-4d95-8be4-d663317d3d13.json b/data/hfopenllm_v2/meta/TinyLlama/TinyLlama_v1.1/e81db661-b05a-4d95-8be4-d663317d3d13.json new file mode 100644 index 000000000..3e5c1c9b5 --- /dev/null +++ b/data/hfopenllm_v2/meta/TinyLlama/TinyLlama_v1.1/e81db661-b05a-4d95-8be4-d663317d3d13.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama_v1.1/1762652579.919856", + "retrieved_timestamp": "1762652579.9198568", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TinyLlama/TinyLlama_v1.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "TinyLlama/TinyLlama_v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20006139266036338 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30237018045076064 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24580536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36996874999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10488696808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Triangle104/DS-Distilled-Hermes-Llama-3.1/d8a0873b-58e8-449a-aedd-7117e9931546.json b/data/hfopenllm_v2/meta/Triangle104/DS-Distilled-Hermes-Llama-3.1/d8a0873b-58e8-449a-aedd-7117e9931546.json new file mode 100644 index 000000000..e1e284a20 --- /dev/null +++ b/data/hfopenllm_v2/meta/Triangle104/DS-Distilled-Hermes-Llama-3.1/d8a0873b-58e8-449a-aedd-7117e9931546.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_DS-Distilled-Hermes-Llama-3.1/1762652579.9221509", + "retrieved_timestamp": "1762652579.922152", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/DS-Distilled-Hermes-Llama-3.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "Triangle104/DS-Distilled-Hermes-Llama-3.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3229353670483207 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5117012556460311 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2930513595166163 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4038541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31100398936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES/9383604e-dd29-4c51-87eb-68f19ff929ec.json b/data/hfopenllm_v2/meta/Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES/9383604e-dd29-4c51-87eb-68f19ff929ec.json new file mode 100644 index 000000000..631abc521 --- /dev/null +++ b/data/hfopenllm_v2/meta/Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES/9383604e-dd29-4c51-87eb-68f19ff929ec.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_DS-Distilled-Hermes-Llama-3.1_TIES/1762652579.922394", + "retrieved_timestamp": "1762652579.922395", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES", + "developer": "meta", + "inference_platform": "unknown", + "id": "Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13641360479084386 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.292845246551473 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24496644295302014 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36209375000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11037234042553191 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Triangle104/DS-R1-Llama-8B-Harmony/ef25dd23-7cc0-46ad-898d-31bfb5205aad.json b/data/hfopenllm_v2/meta/Triangle104/DS-R1-Llama-8B-Harmony/ef25dd23-7cc0-46ad-898d-31bfb5205aad.json new file mode 100644 index 000000000..254c72676 --- /dev/null +++ b/data/hfopenllm_v2/meta/Triangle104/DS-R1-Llama-8B-Harmony/ef25dd23-7cc0-46ad-898d-31bfb5205aad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_DS-R1-Llama-8B-Harmony/1762652579.9232068", + "retrieved_timestamp": "1762652579.9232068", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/DS-R1-Llama-8B-Harmony", + "developer": "meta", + "inference_platform": "unknown", + "id": "Triangle104/DS-R1-Llama-8B-Harmony" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35663262366077564 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41536451555729687 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4282477341389728 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3761979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27435172872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Triangle104/DSR1-Distill-Llama-Lit-8B/b31d5098-4324-4307-aa50-2413ceba5481.json b/data/hfopenllm_v2/meta/Triangle104/DSR1-Distill-Llama-Lit-8B/b31d5098-4324-4307-aa50-2413ceba5481.json new file mode 100644 index 000000000..b32707060 --- /dev/null +++ b/data/hfopenllm_v2/meta/Triangle104/DSR1-Distill-Llama-Lit-8B/b31d5098-4324-4307-aa50-2413ceba5481.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_DSR1-Distill-Llama-Lit-8B/1762652579.923411", + "retrieved_timestamp": "1762652579.923412", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/DSR1-Distill-Llama-Lit-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "Triangle104/DSR1-Distill-Llama-Lit-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18852090231696345 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4284056327107781 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35196374622356497 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35346875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27975398936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Triangle104/Dolphin3-Llama3.2-Smart/88532e60-eff6-404b-8e74-fd8836a99ff9.json b/data/hfopenllm_v2/meta/Triangle104/Dolphin3-Llama3.2-Smart/88532e60-eff6-404b-8e74-fd8836a99ff9.json new file mode 100644 index 000000000..05d7a8a0a --- /dev/null +++ b/data/hfopenllm_v2/meta/Triangle104/Dolphin3-Llama3.2-Smart/88532e60-eff6-404b-8e74-fd8836a99ff9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Dolphin3-Llama3.2-Smart/1762652579.924712", + "retrieved_timestamp": "1762652579.924713", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Dolphin3-Llama3.2-Smart", + "developer": "meta", + "inference_platform": "unknown", + "id": "Triangle104/Dolphin3-Llama3.2-Smart" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.413660199382084 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.397507554563096 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3921666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21949800531914893 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Triangle104/Hermes-Llama-3.2-CoT-Summary/9bd6ca33-d62a-4327-a11e-f36188f0256a.json b/data/hfopenllm_v2/meta/Triangle104/Hermes-Llama-3.2-CoT-Summary/9bd6ca33-d62a-4327-a11e-f36188f0256a.json new file mode 100644 index 000000000..6d019ea07 --- /dev/null +++ b/data/hfopenllm_v2/meta/Triangle104/Hermes-Llama-3.2-CoT-Summary/9bd6ca33-d62a-4327-a11e-f36188f0256a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Hermes-Llama-3.2-CoT-Summary/1762652579.925437", + "retrieved_timestamp": "1762652579.925438", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Hermes-Llama-3.2-CoT-Summary", + "developer": "meta", + "inference_platform": "unknown", + "id": "Triangle104/Hermes-Llama-3.2-CoT-Summary" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48302836473889277 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42003008354054533 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3575 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29014295212765956 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Triangle104/Hermes-Llama-3.2-CoT/ddacf85a-a333-4cf9-b0f2-b9a5d5831b8c.json b/data/hfopenllm_v2/meta/Triangle104/Hermes-Llama-3.2-CoT/ddacf85a-a333-4cf9-b0f2-b9a5d5831b8c.json new file mode 100644 index 000000000..4dfeb4ab6 --- /dev/null +++ b/data/hfopenllm_v2/meta/Triangle104/Hermes-Llama-3.2-CoT/ddacf85a-a333-4cf9-b0f2-b9a5d5831b8c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Hermes-Llama-3.2-CoT/1762652579.925184", + "retrieved_timestamp": "1762652579.925184", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Hermes-Llama-3.2-CoT", + "developer": "meta", + "inference_platform": "unknown", + "id": "Triangle104/Hermes-Llama-3.2-CoT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4177571066991139 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4615751505493966 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09516616314199396 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36978125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2947140957446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Triangle104/Llama3.1-Allades-Lit-8b/d3d2f0cc-2775-4a01-b8ae-5206cafcb2bb.json b/data/hfopenllm_v2/meta/Triangle104/Llama3.1-Allades-Lit-8b/d3d2f0cc-2775-4a01-b8ae-5206cafcb2bb.json new file mode 100644 index 000000000..570d0d896 --- /dev/null +++ b/data/hfopenllm_v2/meta/Triangle104/Llama3.1-Allades-Lit-8b/d3d2f0cc-2775-4a01-b8ae-5206cafcb2bb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Llama3.1-Allades-Lit-8b/1762652579.927552", + "retrieved_timestamp": "1762652579.927553", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Llama3.1-Allades-Lit-8b", + "developer": "meta", + "inference_platform": "unknown", + "id": "Triangle104/Llama3.1-Allades-Lit-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24612361866514182 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41832977787362163 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0022658610271903325 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37083333333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2724401595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Triangle104/Llama3.1-cc-Lit-8b/3ccecc91-6528-4592-8ca3-722a62bfa102.json b/data/hfopenllm_v2/meta/Triangle104/Llama3.1-cc-Lit-8b/3ccecc91-6528-4592-8ca3-722a62bfa102.json new file mode 100644 index 000000000..86232d6b3 --- /dev/null +++ b/data/hfopenllm_v2/meta/Triangle104/Llama3.1-cc-Lit-8b/3ccecc91-6528-4592-8ca3-722a62bfa102.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Llama3.1-cc-Lit-8b/1762652579.927792", + "retrieved_timestamp": "1762652579.9277928", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Llama3.1-cc-Lit-8b", + "developer": "meta", + "inference_platform": "unknown", + "id": "Triangle104/Llama3.1-cc-Lit-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2993047336622384 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3847994561866892 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0030211480362537764 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38540625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30044880319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Triangle104/Porpoise-R1-Llama3.2-3b/29843ea0-0ab4-44e1-8206-10a1135cce8a.json b/data/hfopenllm_v2/meta/Triangle104/Porpoise-R1-Llama3.2-3b/29843ea0-0ab4-44e1-8206-10a1135cce8a.json new file mode 100644 index 000000000..6069bd8a3 --- /dev/null +++ b/data/hfopenllm_v2/meta/Triangle104/Porpoise-R1-Llama3.2-3b/29843ea0-0ab4-44e1-8206-10a1135cce8a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Porpoise-R1-Llama3.2-3b/1762652579.931781", + "retrieved_timestamp": "1762652579.931781", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Porpoise-R1-Llama3.2-3b", + "developer": "meta", + "inference_platform": "unknown", + "id": "Triangle104/Porpoise-R1-Llama3.2-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4352174452674459 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38236758004585686 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04229607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.357625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21168550531914893 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Triangle104/RomboHermes3-R1-Llama3.2-3b/8ce06258-4909-4e46-a326-85052d28c5ff.json b/data/hfopenllm_v2/meta/Triangle104/RomboHermes3-R1-Llama3.2-3b/8ce06258-4909-4e46-a326-85052d28c5ff.json new file mode 100644 index 000000000..3c87d20ca --- /dev/null +++ b/data/hfopenllm_v2/meta/Triangle104/RomboHermes3-R1-Llama3.2-3b/8ce06258-4909-4e46-a326-85052d28c5ff.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_RomboHermes3-R1-Llama3.2-3b/1762652579.9345112", + "retrieved_timestamp": "1762652579.9345121", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/RomboHermes3-R1-Llama3.2-3b", + "developer": "meta", + "inference_platform": "unknown", + "id": "Triangle104/RomboHermes3-R1-Llama3.2-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.300728733094855 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42639466274987187 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08157099697885196 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36565625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2957114361702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/UKzExecution/LlamaExecutor-8B-3.0.5/0f2ddff5-6077-4166-8fe4-ade89d3a6003.json b/data/hfopenllm_v2/meta/UKzExecution/LlamaExecutor-8B-3.0.5/0f2ddff5-6077-4166-8fe4-ade89d3a6003.json new file mode 100644 index 000000000..64645ec98 --- /dev/null +++ b/data/hfopenllm_v2/meta/UKzExecution/LlamaExecutor-8B-3.0.5/0f2ddff5-6077-4166-8fe4-ade89d3a6003.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/UKzExecution_LlamaExecutor-8B-3.0.5/1762652579.938387", + "retrieved_timestamp": "1762652579.938387", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "UKzExecution/LlamaExecutor-8B-3.0.5", + "developer": "meta", + "inference_platform": "unknown", + "id": "UKzExecution/LlamaExecutor-8B-3.0.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.740290207759855 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5006000507021341 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10196374622356495 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3753645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3625332446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B-r-v-0.1/c3448f16-33c4-42c8-bde3-b503786cba7f.json b/data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B-r-v-0.1/c3448f16-33c4-42c8-bde3-b503786cba7f.json new file mode 100644 index 000000000..4d2ca2fca --- /dev/null +++ b/data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B-r-v-0.1/c3448f16-33c4-42c8-bde3-b503786cba7f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/VIRNECT_llama-3-Korean-8B-r-v-0.1/1762652579.944067", + "retrieved_timestamp": "1762652579.9440682", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VIRNECT/llama-3-Korean-8B-r-v-0.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "VIRNECT/llama-3-Korean-8B-r-v-0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49157125316382755 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48061568139086264 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08610271903323263 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2424496644295302 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36748958333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3259640957446808 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 16.061 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B/1193d16a-5ba8-4a6c-b13d-116bb7731a71.json b/data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B/1193d16a-5ba8-4a6c-b13d-116bb7731a71.json new file mode 100644 index 000000000..876e4d2d1 --- /dev/null +++ b/data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B/1193d16a-5ba8-4a6c-b13d-116bb7731a71.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/VIRNECT_llama-3-Korean-8B/1762652579.943881", + "retrieved_timestamp": "1762652579.943882", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VIRNECT/llama-3-Korean-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "VIRNECT/llama-3-Korean-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5021376614050719 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.491837579362695 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10800604229607251 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3647916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3536402925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B/c5ef57d2-a521-4b09-9aa1-0c06c9888cda.json b/data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B/c5ef57d2-a521-4b09-9aa1-0c06c9888cda.json new file mode 100644 index 000000000..c05d3ebaf --- /dev/null +++ b/data/hfopenllm_v2/meta/VIRNECT/llama-3-Korean-8B/c5ef57d2-a521-4b09-9aa1-0c06c9888cda.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/VIRNECT_llama-3-Korean-8B/1762652579.943627", + "retrieved_timestamp": "1762652579.943627", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VIRNECT/llama-3-Korean-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "VIRNECT/llama-3-Korean-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5058345190760515 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49082453083378397 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09290030211480363 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36615624999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3538896276595745 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3-70B-Fireplace/60150622-5b73-4b2c-a8f2-7c2e84cd3d0e.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3-70B-Fireplace/60150622-5b73-4b2c-a8f2-7c2e84cd3d0e.json new file mode 100644 index 000000000..99634b645 --- /dev/null +++ b/data/hfopenllm_v2/meta/ValiantLabs/Llama3-70B-Fireplace/60150622-5b73-4b2c-a8f2-7c2e84cd3d0e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3-70B-Fireplace/1762652579.944278", + "retrieved_timestamp": "1762652579.944279", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ValiantLabs/Llama3-70B-Fireplace", + "developer": "meta", + "inference_platform": "unknown", + "id": "ValiantLabs/Llama3-70B-Fireplace" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7773596280092377 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.648899361888402 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21450151057401812 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3548657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4448541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4892785904255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3-70B-ShiningValiant2/1650ab9b-4e64-48f1-9551-fb58758cb2f6.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3-70B-ShiningValiant2/1650ab9b-4e64-48f1-9551-fb58758cb2f6.json new file mode 100644 index 000000000..2b0ac5f88 --- /dev/null +++ b/data/hfopenllm_v2/meta/ValiantLabs/Llama3-70B-ShiningValiant2/1650ab9b-4e64-48f1-9551-fb58758cb2f6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3-70B-ShiningValiant2/1762652579.9445372", + "retrieved_timestamp": "1762652579.944538", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ValiantLabs/Llama3-70B-ShiningValiant2", + "developer": "meta", + "inference_platform": "unknown", + "id": "ValiantLabs/Llama3-70B-ShiningValiant2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6121712611426571 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6338341405069171 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20770392749244712 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4325729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48977726063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-70B-ShiningValiant2/6f4c4594-6f73-44e3-b531-f7651b523e8f.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-70B-ShiningValiant2/6f4c4594-6f73-44e3-b531-f7651b523e8f.json new file mode 100644 index 000000000..f262f4dae --- /dev/null +++ b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-70B-ShiningValiant2/6f4c4594-6f73-44e3-b531-f7651b523e8f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-70B-ShiningValiant2/1762652579.94475", + "retrieved_timestamp": "1762652579.944751", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ValiantLabs/Llama3.1-70B-ShiningValiant2", + "developer": "meta", + "inference_platform": "unknown", + "id": "ValiantLabs/Llama3.1-70B-ShiningValiant2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5355346037402979 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6738408402945882 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29154078549848944 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3926174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4681041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5172872340425532 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Cobalt/382ce872-f5a6-4753-9cca-ba06ddcbb4b6.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Cobalt/382ce872-f5a6-4753-9cca-ba06ddcbb4b6.json new file mode 100644 index 000000000..0c9b1d182 --- /dev/null +++ b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Cobalt/382ce872-f5a6-4753-9cca-ba06ddcbb4b6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Cobalt/1762652579.945206", + "retrieved_timestamp": "1762652579.945206", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ValiantLabs/Llama3.1-8B-Cobalt", + "developer": "meta", + "inference_platform": "unknown", + "id": "ValiantLabs/Llama3.1-8B-Cobalt" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7168346653545925 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4910700749859321 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15332326283987915 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3512395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36627327127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Cobalt/8683a084-2521-469c-8575-9b2595c112dd.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Cobalt/8683a084-2521-469c-8575-9b2595c112dd.json new file mode 100644 index 000000000..246746d99 --- /dev/null +++ b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Cobalt/8683a084-2521-469c-8575-9b2595c112dd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Cobalt/1762652579.9449751", + "retrieved_timestamp": "1762652579.9449759", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ValiantLabs/Llama3.1-8B-Cobalt", + "developer": "meta", + "inference_platform": "unknown", + "id": "ValiantLabs/Llama3.1-8B-Cobalt" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3496134700372789 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4946769968149292 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1268882175226586 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3959479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3644448138297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Enigma/e1c4e454-79c8-448d-ab33-629900a35779.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Enigma/e1c4e454-79c8-448d-ab33-629900a35779.json new file mode 100644 index 000000000..0624195d5 --- /dev/null +++ b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Enigma/e1c4e454-79c8-448d-ab33-629900a35779.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Enigma/1762652579.945396", + "retrieved_timestamp": "1762652579.945397", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ValiantLabs/Llama3.1-8B-Enigma", + "developer": "meta", + "inference_platform": "unknown", + "id": "ValiantLabs/Llama3.1-8B-Enigma" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26805542626896633 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44776000880153927 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4196041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34092420212765956 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Esper2/aa8f6d7a-bf7a-4e00-932f-b31c9cf0705e.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Esper2/aa8f6d7a-bf7a-4e00-932f-b31c9cf0705e.json new file mode 100644 index 000000000..2d711f22d --- /dev/null +++ b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Esper2/aa8f6d7a-bf7a-4e00-932f-b31c9cf0705e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Esper2/1762652579.945612", + "retrieved_timestamp": "1762652579.9456131", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ValiantLabs/Llama3.1-8B-Esper2", + "developer": "meta", + "inference_platform": "unknown", + "id": "ValiantLabs/Llama3.1-8B-Esper2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2567398945907968 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4469866863000255 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3560729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29039228723404253 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Fireplace2/08843042-f5ed-4dbb-befe-82c48e370664.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Fireplace2/08843042-f5ed-4dbb-befe-82c48e370664.json new file mode 100644 index 000000000..46e306758 --- /dev/null +++ b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Fireplace2/08843042-f5ed-4dbb-befe-82c48e370664.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Fireplace2/1762652579.945827", + "retrieved_timestamp": "1762652579.945827", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ValiantLabs/Llama3.1-8B-Fireplace2", + "developer": "meta", + "inference_platform": "unknown", + "id": "ValiantLabs/Llama3.1-8B-Fireplace2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5483240025354947 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4609817052543379 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0581570996978852 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34330208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24069148936170212 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Fireplace2/8c25e90b-944b-4c23-a7ed-43c9609c6bf7.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Fireplace2/8c25e90b-944b-4c23-a7ed-43c9609c6bf7.json new file mode 100644 index 000000000..5afb1ab3c --- /dev/null +++ b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-Fireplace2/8c25e90b-944b-4c23-a7ed-43c9609c6bf7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Fireplace2/1762652579.946038", + "retrieved_timestamp": "1762652579.946039", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ValiantLabs/Llama3.1-8B-Fireplace2", + "developer": "meta", + "inference_platform": "unknown", + "id": "ValiantLabs/Llama3.1-8B-Fireplace2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5328118281714739 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4613311485871581 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08761329305135952 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33666666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24235372340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-ShiningValiant2/4b3c0c63-4718-4fce-bd70-a31b3b60dfad.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-ShiningValiant2/4b3c0c63-4718-4fce-bd70-a31b3b60dfad.json new file mode 100644 index 000000000..ddc992cbe --- /dev/null +++ b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-ShiningValiant2/4b3c0c63-4718-4fce-bd70-a31b3b60dfad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-ShiningValiant2/1762652579.946223", + "retrieved_timestamp": "1762652579.9462242", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ValiantLabs/Llama3.1-8B-ShiningValiant2", + "developer": "meta", + "inference_platform": "unknown", + "id": "ValiantLabs/Llama3.1-8B-ShiningValiant2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6495653754260917 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.477390600131639 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39086458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33818151595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-ShiningValiant2/e1d82962-59c9-44e7-9243-ea62f6639d1e.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-ShiningValiant2/e1d82962-59c9-44e7-9243-ea62f6639d1e.json new file mode 100644 index 000000000..e6078bf3b --- /dev/null +++ b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.1-8B-ShiningValiant2/e1d82962-59c9-44e7-9243-ea62f6639d1e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-ShiningValiant2/1762652579.946434", + "retrieved_timestamp": "1762652579.946435", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ValiantLabs/Llama3.1-8B-ShiningValiant2", + "developer": "meta", + "inference_platform": "unknown", + "id": "ValiantLabs/Llama3.1-8B-ShiningValiant2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26780608784691284 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4429290017852748 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05211480362537765 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39591666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.292719414893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-Enigma/71e3ab93-9667-4e99-b0a1-e25b701b13fd.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-Enigma/71e3ab93-9667-4e99-b0a1-e25b701b13fd.json new file mode 100644 index 000000000..1b20e7100 --- /dev/null +++ b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-Enigma/71e3ab93-9667-4e99-b0a1-e25b701b13fd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.2-3B-Enigma/1762652579.94662", + "retrieved_timestamp": "1762652579.946621", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ValiantLabs/Llama3.2-3B-Enigma", + "developer": "meta", + "inference_platform": "unknown", + "id": "ValiantLabs/Llama3.2-3B-Enigma" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2786218345102107 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3722590772046992 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3921354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2427692819148936 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-Esper2/5567fc86-d3f8-4ef7-94d8-12fc28eeb9b4.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-Esper2/5567fc86-d3f8-4ef7-94d8-12fc28eeb9b4.json new file mode 100644 index 000000000..e0e4b0b6a --- /dev/null +++ b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-Esper2/5567fc86-d3f8-4ef7-94d8-12fc28eeb9b4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.2-3B-Esper2/1762652579.947128", + "retrieved_timestamp": "1762652579.9471302", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ValiantLabs/Llama3.2-3B-Esper2", + "developer": "meta", + "inference_platform": "unknown", + "id": "ValiantLabs/Llama3.2-3B-Esper2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27497484452364174 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38082611390366106 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03625377643504532 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3549583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22573138297872342 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-ShiningValiant2/6c3a0d11-d421-4420-9df7-359164a85893.json b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-ShiningValiant2/6c3a0d11-d421-4420-9df7-359164a85893.json new file mode 100644 index 000000000..b9cdfa25c --- /dev/null +++ b/data/hfopenllm_v2/meta/ValiantLabs/Llama3.2-3B-ShiningValiant2/6c3a0d11-d421-4420-9df7-359164a85893.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.2-3B-ShiningValiant2/1762652579.947389", + "retrieved_timestamp": "1762652579.9473898", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ValiantLabs/Llama3.2-3B-ShiningValiant2", + "developer": "meta", + "inference_platform": "unknown", + "id": "ValiantLabs/Llama3.2-3B-ShiningValiant2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625101397624968 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42259325337870185 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0823262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38664583333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28291223404255317 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Weyaxi/Einstein-v6.1-Llama3-8B/13c07664-1ff1-48a4-a43d-877fc05bd19d.json b/data/hfopenllm_v2/meta/Weyaxi/Einstein-v6.1-Llama3-8B/13c07664-1ff1-48a4-a43d-877fc05bd19d.json new file mode 100644 index 000000000..5c46e2738 --- /dev/null +++ b/data/hfopenllm_v2/meta/Weyaxi/Einstein-v6.1-Llama3-8B/13c07664-1ff1-48a4-a43d-877fc05bd19d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v6.1-Llama3-8B/1762652579.9489238", + "retrieved_timestamp": "1762652579.948925", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Weyaxi/Einstein-v6.1-Llama3-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "Weyaxi/Einstein-v6.1-Llama3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4568245588372186 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5008295581095018 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42128125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3130817819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B/06985382-8aec-4aa3-85ff-774da25ed2d3.json b/data/hfopenllm_v2/meta/Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B/06985382-8aec-4aa3-85ff-774da25ed2d3.json new file mode 100644 index 000000000..8ea9c3de6 --- /dev/null +++ b/data/hfopenllm_v2/meta/Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B/06985382-8aec-4aa3-85ff-774da25ed2d3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v6.1-developed-by-Weyaxi-Llama3-8B/1762652579.9492018", + "retrieved_timestamp": "1762652579.949203", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39270247388041507 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5043837450549643 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07175226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43324999999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30925864361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Weyaxi/Einstein-v8-Llama3.2-1B/5edf6193-a8d6-41d3-b2fd-20f7ce537770.json b/data/hfopenllm_v2/meta/Weyaxi/Einstein-v8-Llama3.2-1B/5edf6193-a8d6-41d3-b2fd-20f7ce537770.json new file mode 100644 index 000000000..42d78e7ab --- /dev/null +++ b/data/hfopenllm_v2/meta/Weyaxi/Einstein-v8-Llama3.2-1B/5edf6193-a8d6-41d3-b2fd-20f7ce537770.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v8-Llama3.2-1B/1762652579.9499211", + "retrieved_timestamp": "1762652579.949922", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Weyaxi/Einstein-v8-Llama3.2-1B", + "developer": "meta", + "inference_platform": "unknown", + "id": "Weyaxi/Einstein-v8-Llama3.2-1B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18622255615101263 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30184334823943154 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0007552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36178125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11610704787234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2/5ae4b63d-a84b-4468-aefe-8b5c7b88323e.json b/data/hfopenllm_v2/meta/Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2/5ae4b63d-a84b-4468-aefe-8b5c7b88323e.json new file mode 100644 index 000000000..7cf568095 --- /dev/null +++ b/data/hfopenllm_v2/meta/Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2/5ae4b63d-a84b-4468-aefe-8b5c7b88323e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Llama3.2-1B-THREADRIPPER-v0.2/1762652579.952687", + "retrieved_timestamp": "1762652579.9526882", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2", + "developer": "meta", + "inference_platform": "unknown", + "id": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5317878783849076 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3527816493941946 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06570996978851963 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33164583333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1745345744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Xiaojian9992024/Llama3.2-1B-THREADRIPPER/b7c71bb9-0f3b-4d2f-8902-5fefac1629c5.json b/data/hfopenllm_v2/meta/Xiaojian9992024/Llama3.2-1B-THREADRIPPER/b7c71bb9-0f3b-4d2f-8902-5fefac1629c5.json new file mode 100644 index 000000000..c0602b9d5 --- /dev/null +++ b/data/hfopenllm_v2/meta/Xiaojian9992024/Llama3.2-1B-THREADRIPPER/b7c71bb9-0f3b-4d2f-8902-5fefac1629c5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Llama3.2-1B-THREADRIPPER/1762652579.952322", + "retrieved_timestamp": "1762652579.952322", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER", + "developer": "meta", + "inference_platform": "unknown", + "id": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5575916346405316 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35437497890840614 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07401812688821752 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31297916666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17627992021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Xkev/Llama-3.2V-11B-cot/55f777f4-460f-4b83-a309-7e9e9113fd55.json b/data/hfopenllm_v2/meta/Xkev/Llama-3.2V-11B-cot/55f777f4-460f-4b83-a309-7e9e9113fd55.json new file mode 100644 index 000000000..d53baa335 --- /dev/null +++ b/data/hfopenllm_v2/meta/Xkev/Llama-3.2V-11B-cot/55f777f4-460f-4b83-a309-7e9e9113fd55.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Xkev_Llama-3.2V-11B-cot/1762652579.9552681", + "retrieved_timestamp": "1762652579.955269", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Xkev/Llama-3.2V-11B-cot", + "developer": "meta", + "inference_platform": "unknown", + "id": "Xkev/Llama-3.2V-11B-cot" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41580894249480266 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.495871783411897 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1555891238670695 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4158541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35871010638297873 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MllamaForConditionalGeneration", + "params_billions": 10.67 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Yuma42/Llama3.1-IgneousIguana-8B/cd2f97bc-3f4d-43f2-b100-09eec8d122a6.json b/data/hfopenllm_v2/meta/Yuma42/Llama3.1-IgneousIguana-8B/cd2f97bc-3f4d-43f2-b100-09eec8d122a6.json new file mode 100644 index 000000000..f2b46a34f --- /dev/null +++ b/data/hfopenllm_v2/meta/Yuma42/Llama3.1-IgneousIguana-8B/cd2f97bc-3f4d-43f2-b100-09eec8d122a6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Yuma42_Llama3.1-IgneousIguana-8B/1762652579.965119", + "retrieved_timestamp": "1762652579.965119", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yuma42/Llama3.1-IgneousIguana-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "Yuma42/Llama3.1-IgneousIguana-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8133297428600558 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5190512670457804 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21978851963746224 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42026041666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39735704787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/Yuma42/Llama3.1-SuperHawk-8B/458dd163-075e-48ca-bb3b-650912f55696.json b/data/hfopenllm_v2/meta/Yuma42/Llama3.1-SuperHawk-8B/458dd163-075e-48ca-bb3b-650912f55696.json new file mode 100644 index 000000000..716f2e2d2 --- /dev/null +++ b/data/hfopenllm_v2/meta/Yuma42/Llama3.1-SuperHawk-8B/458dd163-075e-48ca-bb3b-650912f55696.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Yuma42_Llama3.1-SuperHawk-8B/1762652579.965369", + "retrieved_timestamp": "1762652579.9653702", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Yuma42/Llama3.1-SuperHawk-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "Yuma42/Llama3.1-SuperHawk-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7986420475449585 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5199931545260023 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2348942598187311 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40835416666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39453125 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix/2c35754b-3763-4098-8686-39694028e0d9.json b/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix/2c35754b-3763-4098-8686-39694028e0d9.json new file mode 100644 index 000000000..019a8a7c8 --- /dev/null +++ b/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix/2c35754b-3763-4098-8686-39694028e0d9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ZeroXClem_Llama-3.1-8B-AthenaSky-MegaMix/1762652579.966579", + "retrieved_timestamp": "1762652579.96658", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix", + "developer": "meta", + "inference_platform": "unknown", + "id": "ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.63008151704145 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5163423288466883 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2794561933534743 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35384375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3503989361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix/18072fb3-a27a-4ad7-93ef-a3770637a0dc.json b/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix/18072fb3-a27a-4ad7-93ef-a3770637a0dc.json new file mode 100644 index 000000000..a54390a0b --- /dev/null +++ b/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix/18072fb3-a27a-4ad7-93ef-a3770637a0dc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ZeroXClem_Llama-3.1-8B-RainbowLight-EtherealMix/1762652579.96684", + "retrieved_timestamp": "1762652579.966841", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix", + "developer": "meta", + "inference_platform": "unknown", + "id": "ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49734149833552754 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5154785280029148 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1216012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39470833333333327 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.363031914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SpecialTitanFusion/38be33eb-3dfb-4987-a2f0-14ceb9d834f7.json b/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SpecialTitanFusion/38be33eb-3dfb-4987-a2f0-14ceb9d834f7.json new file mode 100644 index 000000000..317cd27c4 --- /dev/null +++ b/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SpecialTitanFusion/38be33eb-3dfb-4987-a2f0-14ceb9d834f7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ZeroXClem_Llama-3.1-8B-SpecialTitanFusion/1762652579.967058", + "retrieved_timestamp": "1762652579.967059", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZeroXClem/Llama-3.1-8B-SpecialTitanFusion", + "developer": "meta", + "inference_platform": "unknown", + "id": "ZeroXClem/Llama-3.1-8B-SpecialTitanFusion" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7402403400754443 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5438928349489152 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23338368580060423 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38739583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3621176861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes/1007d3aa-f8ca-420c-b974-a0f552c649ac.json b/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes/1007d3aa-f8ca-420c-b974-a0f552c649ac.json new file mode 100644 index 000000000..36d81d1fb --- /dev/null +++ b/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes/1007d3aa-f8ca-420c-b974-a0f552c649ac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ZeroXClem_Llama-3.1-8B-SuperNova-EtherealHermes/1762652579.967272", + "retrieved_timestamp": "1762652579.967272", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes", + "developer": "meta", + "inference_platform": "unknown", + "id": "ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7338705745200512 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5244464882599044 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17447129909365558 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4065833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37450132978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova/ba3564f4-f48f-4548-ae15-b5f78c4b44f4.json b/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova/ba3564f4-f48f-4548-ae15-b5f78c4b44f4.json new file mode 100644 index 000000000..25686b3dc --- /dev/null +++ b/data/hfopenllm_v2/meta/ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova/ba3564f4-f48f-4548-ae15-b5f78c4b44f4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ZeroXClem_Llama-3.1-8B-SuperTulu-LexiNova/1762652579.96749", + "retrieved_timestamp": "1762652579.9674911", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova", + "developer": "meta", + "inference_platform": "unknown", + "id": "ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4164583305629064 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5078595074869328 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25302114803625375 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39706249999999993 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3367686170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/aaditya/Llama3-OpenBioLLM-70B/e68ae3f7-3f46-43bb-8e14-0523af96998e.json b/data/hfopenllm_v2/meta/aaditya/Llama3-OpenBioLLM-70B/e68ae3f7-3f46-43bb-8e14-0523af96998e.json new file mode 100644 index 000000000..c0f59e191 --- /dev/null +++ b/data/hfopenllm_v2/meta/aaditya/Llama3-OpenBioLLM-70B/e68ae3f7-3f46-43bb-8e14-0523af96998e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/aaditya_Llama3-OpenBioLLM-70B/1762652579.969287", + "retrieved_timestamp": "1762652579.9692879", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "aaditya/Llama3-OpenBioLLM-70B", + "developer": "meta", + "inference_platform": "unknown", + "id": "aaditya/Llama3-OpenBioLLM-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7596743307756753 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6398872375485518 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1971299093655589 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44171875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4867021276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/abacusai/Llama-3-Smaug-8B/ea57e277-5694-4981-ac47-d2fa633847ca.json b/data/hfopenllm_v2/meta/abacusai/Llama-3-Smaug-8B/ea57e277-5694-4981-ac47-d2fa633847ca.json new file mode 100644 index 000000000..f2c0d715b --- /dev/null +++ b/data/hfopenllm_v2/meta/abacusai/Llama-3-Smaug-8B/ea57e277-5694-4981-ac47-d2fa633847ca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/abacusai_Llama-3-Smaug-8B/1762652579.9700851", + "retrieved_timestamp": "1762652579.9700859", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "abacusai/Llama-3-Smaug-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "abacusai/Llama-3-Smaug-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48667535472546175 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4930712769667174 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08534743202416918 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2483221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36224999999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3184840425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/abhishek/autotrain-llama3-70b-orpo-v1/eb2ee4fb-cc98-4937-a385-19a5e783d1a7.json b/data/hfopenllm_v2/meta/abhishek/autotrain-llama3-70b-orpo-v1/eb2ee4fb-cc98-4937-a385-19a5e783d1a7.json new file mode 100644 index 000000000..6d480b76d --- /dev/null +++ b/data/hfopenllm_v2/meta/abhishek/autotrain-llama3-70b-orpo-v1/eb2ee4fb-cc98-4937-a385-19a5e783d1a7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/abhishek_autotrain-llama3-70b-orpo-v1/1762652579.973002", + "retrieved_timestamp": "1762652579.973003", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "abhishek/autotrain-llama3-70b-orpo-v1", + "developer": "meta", + "inference_platform": "unknown", + "id": "abhishek/autotrain-llama3-70b-orpo-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4233023932055834 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5997985900252331 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24412751677852348 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35790625000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11220079787234043 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/abhishek/autotrain-llama3-70b-orpo-v2/15617903-e280-4c61-a326-5f615b46b3a8.json b/data/hfopenllm_v2/meta/abhishek/autotrain-llama3-70b-orpo-v2/15617903-e280-4c61-a326-5f615b46b3a8.json new file mode 100644 index 000000000..6a53a9d9a --- /dev/null +++ b/data/hfopenllm_v2/meta/abhishek/autotrain-llama3-70b-orpo-v2/15617903-e280-4c61-a326-5f615b46b3a8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/abhishek_autotrain-llama3-70b-orpo-v2/1762652579.9732742", + "retrieved_timestamp": "1762652579.973275", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "abhishek/autotrain-llama3-70b-orpo-v2", + "developer": "meta", + "inference_platform": "unknown", + "id": "abhishek/autotrain-llama3-70b-orpo-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5406055931594835 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5899473641612185 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2107250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41133333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48179853723404253 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/abhishek/autotrain-llama3-orpo-v2/f8515d35-c7e8-440b-a61f-16f5acfdc003.json b/data/hfopenllm_v2/meta/abhishek/autotrain-llama3-orpo-v2/f8515d35-c7e8-440b-a61f-16f5acfdc003.json new file mode 100644 index 000000000..d8212bad3 --- /dev/null +++ b/data/hfopenllm_v2/meta/abhishek/autotrain-llama3-orpo-v2/f8515d35-c7e8-440b-a61f-16f5acfdc003.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/abhishek_autotrain-llama3-orpo-v2/1762652579.9735", + "retrieved_timestamp": "1762652579.973501", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "abhishek/autotrain-llama3-orpo-v2", + "developer": "meta", + "inference_platform": "unknown", + "id": "abhishek/autotrain-llama3-orpo-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4371656094717572 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31593828880846425 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04682779456193353 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3792395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22182513297872342 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/agentlans/Llama3.1-8B-drill/869f9850-417b-43d7-bb40-61375a8bb09c.json b/data/hfopenllm_v2/meta/agentlans/Llama3.1-8B-drill/869f9850-417b-43d7-bb40-61375a8bb09c.json new file mode 100644 index 000000000..bda25520a --- /dev/null +++ b/data/hfopenllm_v2/meta/agentlans/Llama3.1-8B-drill/869f9850-417b-43d7-bb40-61375a8bb09c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-8B-drill/1762652579.976306", + "retrieved_timestamp": "1762652579.976307", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "agentlans/Llama3.1-8B-drill", + "developer": "meta", + "inference_platform": "unknown", + "id": "agentlans/Llama3.1-8B-drill" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.765169749597734 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5015680021795333 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1714501510574018 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36723958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37757646276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/agentlans/Llama3.1-Daredevilish/417b2c35-090e-42c3-8a92-04f7258702a3.json b/data/hfopenllm_v2/meta/agentlans/Llama3.1-Daredevilish/417b2c35-090e-42c3-8a92-04f7258702a3.json new file mode 100644 index 000000000..81e8ae7fb --- /dev/null +++ b/data/hfopenllm_v2/meta/agentlans/Llama3.1-Daredevilish/417b2c35-090e-42c3-8a92-04f7258702a3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-Daredevilish/1762652579.976594", + "retrieved_timestamp": "1762652579.976595", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "agentlans/Llama3.1-Daredevilish", + "developer": "meta", + "inference_platform": "unknown", + "id": "agentlans/Llama3.1-Daredevilish" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6291573026237051 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5012506630648397 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12915407854984895 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40909375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3696808510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/agentlans/Llama3.1-LexiHermes-SuperStorm/6f966179-a456-4914-807d-45ab507e0388.json b/data/hfopenllm_v2/meta/agentlans/Llama3.1-LexiHermes-SuperStorm/6f966179-a456-4914-807d-45ab507e0388.json new file mode 100644 index 000000000..5ad561941 --- /dev/null +++ b/data/hfopenllm_v2/meta/agentlans/Llama3.1-LexiHermes-SuperStorm/6f966179-a456-4914-807d-45ab507e0388.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-LexiHermes-SuperStorm/1762652579.97705", + "retrieved_timestamp": "1762652579.9770508", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "agentlans/Llama3.1-LexiHermes-SuperStorm", + "developer": "meta", + "inference_platform": "unknown", + "id": "agentlans/Llama3.1-LexiHermes-SuperStorm" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7834545672149895 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5266460888159817 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16163141993957703 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3962604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3843916223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K/455bd496-7a32-45c9-a792-3982781fdc16.json b/data/hfopenllm_v2/meta/agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K/455bd496-7a32-45c9-a792-3982781fdc16.json new file mode 100644 index 000000000..1fa2d64c7 --- /dev/null +++ b/data/hfopenllm_v2/meta/agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K/455bd496-7a32-45c9-a792-3982781fdc16.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-SuperDeepFuse-CrashCourse12K/1762652579.977621", + "retrieved_timestamp": "1762652579.977621", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K", + "developer": "meta", + "inference_platform": "unknown", + "id": "agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.718732961874493 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5215513828266275 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18051359516616314 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40264583333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3631150265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/agentlans/Llama3.1-SuperDeepFuse/6301252b-2353-438a-9e60-c6a572adfc5f.json b/data/hfopenllm_v2/meta/agentlans/Llama3.1-SuperDeepFuse/6301252b-2353-438a-9e60-c6a572adfc5f.json new file mode 100644 index 000000000..92c9c7173 --- /dev/null +++ b/data/hfopenllm_v2/meta/agentlans/Llama3.1-SuperDeepFuse/6301252b-2353-438a-9e60-c6a572adfc5f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-SuperDeepFuse/1762652579.977348", + "retrieved_timestamp": "1762652579.97735", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "agentlans/Llama3.1-SuperDeepFuse", + "developer": "meta", + "inference_platform": "unknown", + "id": "agentlans/Llama3.1-SuperDeepFuse" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7761605872418517 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5048544889908054 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18277945619335348 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.369875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3774933510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b/54da4a97-6e12-4bb0-9138-dacd981b04bf.json b/data/hfopenllm_v2/meta/ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b/54da4a97-6e12-4bb0-9138-dacd981b04bf.json new file mode 100644 index 000000000..e3f29fc07 --- /dev/null +++ b/data/hfopenllm_v2/meta/ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b/54da4a97-6e12-4bb0-9138-dacd981b04bf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ahmeda335_13_outOf_32_pruned_layers_llama3.1-8b/1762652579.97824", + "retrieved_timestamp": "1762652579.978241", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b", + "developer": "meta", + "inference_platform": "unknown", + "id": "ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17480728910402177 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2883257760266153 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3803229166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11286569148936171 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 5.195 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.01-First/d07eada4-e73c-4dd6-8538-e3a9cd471f34.json b/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.01-First/d07eada4-e73c-4dd6-8538-e3a9cd471f34.json new file mode 100644 index 000000000..f6f4d3c8f --- /dev/null +++ b/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.01-First/d07eada4-e73c-4dd6-8538-e3a9cd471f34.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/akhadangi_Llama3.2.1B.0.01-First/1762652579.979876", + "retrieved_timestamp": "1762652579.979876", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "akhadangi/Llama3.2.1B.0.01-First", + "developer": "meta", + "inference_platform": "unknown", + "id": "akhadangi/Llama3.2.1B.0.01-First" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08135857303066973 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31891926453372005 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01812688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2483221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3193958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1196808510638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.01-Last/9f796e5e-6c31-46e0-b839-e21d33a403c4.json b/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.01-Last/9f796e5e-6c31-46e0-b839-e21d33a403c4.json new file mode 100644 index 000000000..320fe1190 --- /dev/null +++ b/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.01-Last/9f796e5e-6c31-46e0-b839-e21d33a403c4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/akhadangi_Llama3.2.1B.0.01-Last/1762652579.980133", + "retrieved_timestamp": "1762652579.9801338", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "akhadangi/Llama3.2.1B.0.01-Last", + "developer": "meta", + "inference_platform": "unknown", + "id": "akhadangi/Llama3.2.1B.0.01-Last" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09165015492227291 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3159283874883156 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24328859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3206354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12267287234042554 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.1-First/4ec306d4-3f34-4330-9898-fb5ccb9a3483.json b/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.1-First/4ec306d4-3f34-4330-9898-fb5ccb9a3483.json new file mode 100644 index 000000000..5c40571a3 --- /dev/null +++ b/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.1-First/4ec306d4-3f34-4330-9898-fb5ccb9a3483.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/akhadangi_Llama3.2.1B.0.1-First/1762652579.9803479", + "retrieved_timestamp": "1762652579.9803488", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "akhadangi/Llama3.2.1B.0.1-First", + "developer": "meta", + "inference_platform": "unknown", + "id": "akhadangi/Llama3.2.1B.0.1-First" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10009330797838623 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3119615016336897 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.021148036253776436 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24496644295302014 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.330125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11693816489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.1-Last/82c24fd7-de74-4dc8-bd22-5761243ed826.json b/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.1-Last/82c24fd7-de74-4dc8-bd22-5761243ed826.json new file mode 100644 index 000000000..294727f6e --- /dev/null +++ b/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.0.1-Last/82c24fd7-de74-4dc8-bd22-5761243ed826.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/akhadangi_Llama3.2.1B.0.1-Last/1762652579.980555", + "retrieved_timestamp": "1762652579.980556", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "akhadangi/Llama3.2.1B.0.1-Last", + "developer": "meta", + "inference_platform": "unknown", + "id": "akhadangi/Llama3.2.1B.0.1-Last" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09497245087479 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3163776768490709 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.021148036253776436 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23825503355704697 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3340625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11776928191489362 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.BaseFiT/8577766f-d696-489d-8194-31b48c17941a.json b/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.BaseFiT/8577766f-d696-489d-8194-31b48c17941a.json new file mode 100644 index 000000000..daec7b160 --- /dev/null +++ b/data/hfopenllm_v2/meta/akhadangi/Llama3.2.1B.BaseFiT/8577766f-d696-489d-8194-31b48c17941a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/akhadangi_Llama3.2.1B.BaseFiT/1762652579.980761", + "retrieved_timestamp": "1762652579.980762", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "akhadangi/Llama3.2.1B.BaseFiT", + "developer": "meta", + "inference_platform": "unknown", + "id": "akhadangi/Llama3.2.1B.BaseFiT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08827799128534511 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31745151457535453 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02416918429003021 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3220625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1171875 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/akjindal53244/Llama-3.1-Storm-8B/de2d2321-b6ed-4791-9114-757afc963876.json b/data/hfopenllm_v2/meta/akjindal53244/Llama-3.1-Storm-8B/de2d2321-b6ed-4791-9114-757afc963876.json new file mode 100644 index 000000000..776bb8c9b --- /dev/null +++ b/data/hfopenllm_v2/meta/akjindal53244/Llama-3.1-Storm-8B/de2d2321-b6ed-4791-9114-757afc963876.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/akjindal53244_Llama-3.1-Storm-8B/1762652579.981211", + "retrieved_timestamp": "1762652579.981212", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "akjindal53244/Llama-3.1-Storm-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "akjindal53244/Llama-3.1-Storm-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8050616807847621 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5188671226840744 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17220543806646524 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3263422818791946 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4028020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3803191489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/akjindal53244/Llama-3.1-Storm-8B/f9aad6f2-ba24-47de-a613-b4011a2c52d1.json b/data/hfopenllm_v2/meta/akjindal53244/Llama-3.1-Storm-8B/f9aad6f2-ba24-47de-a613-b4011a2c52d1.json new file mode 100644 index 000000000..dc0c8dceb --- /dev/null +++ b/data/hfopenllm_v2/meta/akjindal53244/Llama-3.1-Storm-8B/f9aad6f2-ba24-47de-a613-b4011a2c52d1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/akjindal53244_Llama-3.1-Storm-8B/1762652579.980961", + "retrieved_timestamp": "1762652579.980962", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "akjindal53244/Llama-3.1-Storm-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "akjindal53244/Llama-3.1-Storm-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.803263119633683 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5196330402870707 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1623867069486405 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4028333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3812333776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/alcholjung/llama3_medical_tuned/30324407-0848-48ae-bbd7-80676d9467db.json b/data/hfopenllm_v2/meta/alcholjung/llama3_medical_tuned/30324407-0848-48ae-bbd7-80676d9467db.json new file mode 100644 index 000000000..be6619f00 --- /dev/null +++ b/data/hfopenllm_v2/meta/alcholjung/llama3_medical_tuned/30324407-0848-48ae-bbd7-80676d9467db.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/alcholjung_llama3_medical_tuned/1762652579.9813929", + "retrieved_timestamp": "1762652579.9813938", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "alcholjung/llama3_medical_tuned", + "developer": "meta", + "inference_platform": "unknown", + "id": "alcholjung/llama3_medical_tuned" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010566408241244343 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4512943191660926 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04682779456193353 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46602083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29463098404255317 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 16.061 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-70B/006cafcb-452f-4df0-b42c-058719eb63e4.json b/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-70B/006cafcb-452f-4df0-b42c-058719eb63e4.json new file mode 100644 index 000000000..057045720 --- /dev/null +++ b/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-70B/006cafcb-452f-4df0-b42c-058719eb63e4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-70B/1762652579.981659", + "retrieved_timestamp": "1762652579.981659", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allenai/Llama-3.1-Tulu-3-70B", + "developer": "meta", + "inference_platform": "unknown", + "id": "allenai/Llama-3.1-Tulu-3-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8291167435737177 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6163626496199947 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4501510574018127 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3733221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4948333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46451130319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-70B/5683ed15-2699-4f0c-8e74-a65ff2d4dd49.json b/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-70B/5683ed15-2699-4f0c-8e74-a65ff2d4dd49.json new file mode 100644 index 000000000..4d5e56626 --- /dev/null +++ b/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-70B/5683ed15-2699-4f0c-8e74-a65ff2d4dd49.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-70B/1762652579.981919", + "retrieved_timestamp": "1762652579.981919", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allenai/Llama-3.1-Tulu-3-70B", + "developer": "meta", + "inference_platform": "unknown", + "id": "allenai/Llama-3.1-Tulu-3-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8379344583482937 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6156847169556112 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38293051359516617 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3733221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49880208333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4655917553191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B-RM/1a363aad-a1e7-404e-8c4a-4132f4fbab2b.json b/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B-RM/1a363aad-a1e7-404e-8c4a-4132f4fbab2b.json new file mode 100644 index 000000000..53dfb524a --- /dev/null +++ b/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B-RM/1a363aad-a1e7-404e-8c4a-4132f4fbab2b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B-RM/1762652579.9831831", + "retrieved_timestamp": "1762652579.9831831", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allenai/Llama-3.1-Tulu-3-8B-RM", + "developer": "meta", + "inference_platform": "unknown", + "id": "allenai/Llama-3.1-Tulu-3-8B-RM" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16701352411601217 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2950041147470504 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3764166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10821143617021277 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForSequenceClassification", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B/5ad18861-1b4d-456d-9e1c-e945c1f71530.json b/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B/5ad18861-1b4d-456d-9e1c-e945c1f71530.json new file mode 100644 index 000000000..52d7186f2 --- /dev/null +++ b/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B/5ad18861-1b4d-456d-9e1c-e945c1f71530.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B/1762652579.9825459", + "retrieved_timestamp": "1762652579.982547", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allenai/Llama-3.1-Tulu-3-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "allenai/Llama-3.1-Tulu-3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8266687943545348 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4049833102731906 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19637462235649547 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41746875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2826628989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B/8a7c4b5a-85c7-4fc6-af4c-e9cde5d32d8b.json b/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B/8a7c4b5a-85c7-4fc6-af4c-e9cde5d32d8b.json new file mode 100644 index 000000000..00d3f6bd1 --- /dev/null +++ b/data/hfopenllm_v2/meta/allenai/Llama-3.1-Tulu-3-8B/8a7c4b5a-85c7-4fc6-af4c-e9cde5d32d8b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B/1762652579.982752", + "retrieved_timestamp": "1762652579.982752", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allenai/Llama-3.1-Tulu-3-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "allenai/Llama-3.1-Tulu-3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8254697535871487 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40608256120952024 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21148036253776434 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41746875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2820811170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/allknowingroger/Llama3.1-60B/21684c0e-c9b7-4375-bf05-cf63e9bd19b4.json b/data/hfopenllm_v2/meta/allknowingroger/Llama3.1-60B/21684c0e-c9b7-4375-bf05-cf63e9bd19b4.json new file mode 100644 index 000000000..54aa86661 --- /dev/null +++ b/data/hfopenllm_v2/meta/allknowingroger/Llama3.1-60B/21684c0e-c9b7-4375-bf05-cf63e9bd19b4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Llama3.1-60B/1762652579.989347", + "retrieved_timestamp": "1762652579.9893482", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Llama3.1-60B", + "developer": "meta", + "inference_platform": "unknown", + "id": "allknowingroger/Llama3.1-60B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18145188100905596 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32417552719382076 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3595833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3310339095744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 61.997 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/allknowingroger/Yillama-40B/ab5ef6c9-76de-470e-b524-497036db94d4.json b/data/hfopenllm_v2/meta/allknowingroger/Yillama-40B/ab5ef6c9-76de-470e-b524-497036db94d4.json new file mode 100644 index 000000000..bf67cf0a4 --- /dev/null +++ b/data/hfopenllm_v2/meta/allknowingroger/Yillama-40B/ab5ef6c9-76de-470e-b524-497036db94d4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Yillama-40B/1762652580.004728", + "retrieved_timestamp": "1762652580.004729", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Yillama-40B", + "developer": "meta", + "inference_platform": "unknown", + "id": "allknowingroger/Yillama-40B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16968643200042555 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40628855371888356 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3500625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1981382978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/allknowingroger/llama3-Jallabi-40B-s/d46307f8-774b-4871-a32a-6c5a9cc6b1b8.json b/data/hfopenllm_v2/meta/allknowingroger/llama3-Jallabi-40B-s/d46307f8-774b-4871-a32a-6c5a9cc6b1b8.json new file mode 100644 index 000000000..6d5e63687 --- /dev/null +++ b/data/hfopenllm_v2/meta/allknowingroger/llama3-Jallabi-40B-s/d46307f8-774b-4871-a32a-6c5a9cc6b1b8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_llama3-Jallabi-40B-s/1762652580.006197", + "retrieved_timestamp": "1762652580.006198", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/llama3-Jallabi-40B-s", + "developer": "meta", + "inference_platform": "unknown", + "id": "allknowingroger/llama3-Jallabi-40B-s" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19206815693471102 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32522424198526295 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23741610738255034 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37495833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10879321808510638 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 18.769 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/allknowingroger/llama3AnFeng-40B/dc25bda9-966c-44f8-991b-ad891d59befe.json b/data/hfopenllm_v2/meta/allknowingroger/llama3AnFeng-40B/dc25bda9-966c-44f8-991b-ad891d59befe.json new file mode 100644 index 000000000..de53ead45 --- /dev/null +++ b/data/hfopenllm_v2/meta/allknowingroger/llama3AnFeng-40B/dc25bda9-966c-44f8-991b-ad891d59befe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_llama3AnFeng-40B/1762652580.006448", + "retrieved_timestamp": "1762652580.006449", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/llama3AnFeng-40B", + "developer": "meta", + "inference_platform": "unknown", + "id": "allknowingroger/llama3AnFeng-40B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17420776872032873 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3794080447660335 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39399999999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1979720744680851 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 39.971 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/aloobun/Meta-Llama-3-7B-28Layers/f020ec4e-f026-4034-a219-1aacfcbb16b0.json b/data/hfopenllm_v2/meta/aloobun/Meta-Llama-3-7B-28Layers/f020ec4e-f026-4034-a219-1aacfcbb16b0.json new file mode 100644 index 000000000..8dbaed181 --- /dev/null +++ b/data/hfopenllm_v2/meta/aloobun/Meta-Llama-3-7B-28Layers/f020ec4e-f026-4034-a219-1aacfcbb16b0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/aloobun_Meta-Llama-3-7B-28Layers/1762652580.0090299", + "retrieved_timestamp": "1762652580.0090308", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "aloobun/Meta-Llama-3-7B-28Layers", + "developer": "meta", + "inference_platform": "unknown", + "id": "aloobun/Meta-Llama-3-7B-28Layers" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19636453498938372 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4437497014253391 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35892708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3159906914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.158 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/amd/AMD-Llama-135m/086ca0cf-79a3-4b94-980d-9384f1848562.json b/data/hfopenllm_v2/meta/amd/AMD-Llama-135m/086ca0cf-79a3-4b94-980d-9384f1848562.json new file mode 100644 index 000000000..9f786d394 --- /dev/null +++ b/data/hfopenllm_v2/meta/amd/AMD-Llama-135m/086ca0cf-79a3-4b94-980d-9384f1848562.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/amd_AMD-Llama-135m/1762652580.010782", + "retrieved_timestamp": "1762652580.010783", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "amd/AMD-Llama-135m", + "developer": "meta", + "inference_platform": "unknown", + "id": "amd/AMD-Llama-135m" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19184319826948054 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29694449748780255 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0075528700906344415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38457291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11685505319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.134 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/amd/AMD-Llama-135m/4a623195-2073-4637-b748-696012109846.json b/data/hfopenllm_v2/meta/amd/AMD-Llama-135m/4a623195-2073-4637-b748-696012109846.json new file mode 100644 index 000000000..d8c8cb4d5 --- /dev/null +++ b/data/hfopenllm_v2/meta/amd/AMD-Llama-135m/4a623195-2073-4637-b748-696012109846.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/amd_AMD-Llama-135m/1762652580.010537", + "retrieved_timestamp": "1762652580.010538", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "amd/AMD-Llama-135m", + "developer": "meta", + "inference_platform": "unknown", + "id": "amd/AMD-Llama-135m" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18422452426229072 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2973931917569524 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.005287009063444109 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37796874999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11685505319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/arcee-ai/Llama-3.1-SuperNova-Lite/4bc80120-a5e2-4824-b278-c2de7140a2bf.json b/data/hfopenllm_v2/meta/arcee-ai/Llama-3.1-SuperNova-Lite/4bc80120-a5e2-4824-b278-c2de7140a2bf.json new file mode 100644 index 000000000..9b73f51d6 --- /dev/null +++ b/data/hfopenllm_v2/meta/arcee-ai/Llama-3.1-SuperNova-Lite/4bc80120-a5e2-4824-b278-c2de7140a2bf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/arcee-ai_Llama-3.1-SuperNova-Lite/1762652580.016114", + "retrieved_timestamp": "1762652580.016115", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "arcee-ai/Llama-3.1-SuperNova-Lite", + "developer": "meta", + "inference_platform": "unknown", + "id": "arcee-ai/Llama-3.1-SuperNova-Lite" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8017393848322452 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5151992115104819 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18277945619335348 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41632291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3877160904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/arcee-ai/Llama-Spark/aaceb35d-4106-4d6c-b895-446b87394f3b.json b/data/hfopenllm_v2/meta/arcee-ai/Llama-Spark/aaceb35d-4106-4d6c-b895-446b87394f3b.json new file mode 100644 index 000000000..a3e7b60c5 --- /dev/null +++ b/data/hfopenllm_v2/meta/arcee-ai/Llama-Spark/aaceb35d-4106-4d6c-b895-446b87394f3b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/arcee-ai_Llama-Spark/1762652580.0163891", + "retrieved_timestamp": "1762652580.0163898", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "arcee-ai/Llama-Spark", + "developer": "meta", + "inference_platform": "unknown", + "id": "arcee-ai/Llama-Spark" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7910732412221794 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5053504145749979 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13897280966767372 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35933333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3720910904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/argilla-warehouse/Llama-3.1-8B-MagPie-Ultra/4e4260dc-81e0-4e2f-a7ce-dd6a0f7e0796.json b/data/hfopenllm_v2/meta/argilla-warehouse/Llama-3.1-8B-MagPie-Ultra/4e4260dc-81e0-4e2f-a7ce-dd6a0f7e0796.json new file mode 100644 index 000000000..1ca168b87 --- /dev/null +++ b/data/hfopenllm_v2/meta/argilla-warehouse/Llama-3.1-8B-MagPie-Ultra/4e4260dc-81e0-4e2f-a7ce-dd6a0f7e0796.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/argilla-warehouse_Llama-3.1-8B-MagPie-Ultra/1762652580.018188", + "retrieved_timestamp": "1762652580.018189", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "argilla-warehouse/Llama-3.1-8B-MagPie-Ultra", + "developer": "meta", + "inference_platform": "unknown", + "id": "argilla-warehouse/Llama-3.1-8B-MagPie-Ultra" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5756514935925566 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46196134634468616 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0770392749244713 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35425 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31441156914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/asharsha30/LLAMA_Harsha_8_B_ORDP_10k/61523c37-faee-4708-be49-4c7e31d760e6.json b/data/hfopenllm_v2/meta/asharsha30/LLAMA_Harsha_8_B_ORDP_10k/61523c37-faee-4708-be49-4c7e31d760e6.json new file mode 100644 index 000000000..36448adb2 --- /dev/null +++ b/data/hfopenllm_v2/meta/asharsha30/LLAMA_Harsha_8_B_ORDP_10k/61523c37-faee-4708-be49-4c7e31d760e6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/asharsha30_LLAMA_Harsha_8_B_ORDP_10k/1762652580.01895", + "retrieved_timestamp": "1762652580.018951", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "asharsha30/LLAMA_Harsha_8_B_ORDP_10k", + "developer": "meta", + "inference_platform": "unknown", + "id": "asharsha30/LLAMA_Harsha_8_B_ORDP_10k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34639090945358314 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4668707690948544 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36965625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.281000664893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bfuzzy1/acheron-m1a-llama/da59bcfb-1f9a-41e5-9a8c-14f672dce595.json b/data/hfopenllm_v2/meta/bfuzzy1/acheron-m1a-llama/da59bcfb-1f9a-41e5-9a8c-14f672dce595.json new file mode 100644 index 000000000..0fc3bba3d --- /dev/null +++ b/data/hfopenllm_v2/meta/bfuzzy1/acheron-m1a-llama/da59bcfb-1f9a-41e5-9a8c-14f672dce595.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron-m1a-llama/1762652580.0322502", + "retrieved_timestamp": "1762652580.032251", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bfuzzy1/acheron-m1a-llama", + "developer": "meta", + "inference_platform": "unknown", + "id": "bfuzzy1/acheron-m1a-llama" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11245827737070972 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29560475093811295 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0075528700906344415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36330208333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11461103723404255 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.514 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bosonai/Higgs-Llama-3-70B/ebac2d72-ef36-43a7-83de-e28ae3eb4b22.json b/data/hfopenllm_v2/meta/bosonai/Higgs-Llama-3-70B/ebac2d72-ef36-43a7-83de-e28ae3eb4b22.json new file mode 100644 index 000000000..b33a164c5 --- /dev/null +++ b/data/hfopenllm_v2/meta/bosonai/Higgs-Llama-3-70B/ebac2d72-ef36-43a7-83de-e28ae3eb4b22.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bosonai_Higgs-Llama-3-70B/1762652580.035682", + "retrieved_timestamp": "1762652580.035682", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bosonai/Higgs-Llama-3-70B", + "developer": "meta", + "inference_platform": "unknown", + "id": "bosonai/Higgs-Llama-3-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5560678998390935 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.625765879603832 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25226586102719034 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36661073825503354 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44708333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49019281914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bunnycore/Best-Mix-Llama-3.1-8B/ee1e13fe-2ec6-4ce8-8d32-1fe011b12ef8.json b/data/hfopenllm_v2/meta/bunnycore/Best-Mix-Llama-3.1-8B/ee1e13fe-2ec6-4ce8-8d32-1fe011b12ef8.json new file mode 100644 index 000000000..d9dcd487a --- /dev/null +++ b/data/hfopenllm_v2/meta/bunnycore/Best-Mix-Llama-3.1-8B/ee1e13fe-2ec6-4ce8-8d32-1fe011b12ef8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Best-Mix-Llama-3.1-8B/1762652580.0419252", + "retrieved_timestamp": "1762652580.041926", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Best-Mix-Llama-3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "bunnycore/Best-Mix-Llama-3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20670598456539757 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.343178100574048 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2054380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2928541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15649933510638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bunnycore/HyperLlama-3.1-8B/7d031f11-6623-40c0-96bd-b3f0c135600b.json b/data/hfopenllm_v2/meta/bunnycore/HyperLlama-3.1-8B/7d031f11-6623-40c0-96bd-b3f0c135600b.json new file mode 100644 index 000000000..24dcd21db --- /dev/null +++ b/data/hfopenllm_v2/meta/bunnycore/HyperLlama-3.1-8B/7d031f11-6623-40c0-96bd-b3f0c135600b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_HyperLlama-3.1-8B/1762652580.045207", + "retrieved_timestamp": "1762652580.045208", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/HyperLlama-3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "bunnycore/HyperLlama-3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7883005979689446 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5103385292046213 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18277945619335348 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38292708333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3783244680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.1-8B-TitanFusion-Mix/5b0421b6-04ff-422c-a13e-9649306959d4.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.1-8B-TitanFusion-Mix/5b0421b6-04ff-422c-a13e-9649306959d4.json new file mode 100644 index 000000000..9b839b0d8 --- /dev/null +++ b/data/hfopenllm_v2/meta/bunnycore/Llama-3.1-8B-TitanFusion-Mix/5b0421b6-04ff-422c-a13e-9649306959d4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.1-8B-TitanFusion-Mix/1762652580.045413", + "retrieved_timestamp": "1762652580.045414", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Llama-3.1-8B-TitanFusion-Mix", + "developer": "meta", + "inference_platform": "unknown", + "id": "bunnycore/Llama-3.1-8B-TitanFusion-Mix" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4924954675815725 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5755964197928182 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4316979166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3695146276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.1-8B-TitanFusion-v3/6ee91c1c-b44e-44a9-b4b2-4e3cbeb594d3.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.1-8B-TitanFusion-v3/6ee91c1c-b44e-44a9-b4b2-4e3cbeb594d3.json new file mode 100644 index 000000000..967572d7e --- /dev/null +++ b/data/hfopenllm_v2/meta/bunnycore/Llama-3.1-8B-TitanFusion-v3/6ee91c1c-b44e-44a9-b4b2-4e3cbeb594d3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.1-8B-TitanFusion-v3/1762652580.045624", + "retrieved_timestamp": "1762652580.045625", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Llama-3.1-8B-TitanFusion-v3", + "developer": "meta", + "inference_platform": "unknown", + "id": "bunnycore/Llama-3.1-8B-TitanFusion-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4809549772381725 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5262113071794826 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1419939577039275 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4302083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38056848404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-All-Mix/60766e3b-e153-4ee8-8615-1c1e68b7cd75.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-All-Mix/60766e3b-e153-4ee8-8615-1c1e68b7cd75.json new file mode 100644 index 000000000..658608ba9 --- /dev/null +++ b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-All-Mix/60766e3b-e153-4ee8-8615-1c1e68b7cd75.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-All-Mix/1762652580.045842", + "retrieved_timestamp": "1762652580.045843", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Llama-3.2-3B-All-Mix", + "developer": "meta", + "inference_platform": "unknown", + "id": "bunnycore/Llama-3.2-3B-All-Mix" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7226049105262924 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45083384652782293 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15030211480362538 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32869791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3159906914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Bespoke-Thought/b43702d0-eef7-42d8-87b9-c1cbd0edb417.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Bespoke-Thought/b43702d0-eef7-42d8-87b9-c1cbd0edb417.json new file mode 100644 index 000000000..af8e84fdc --- /dev/null +++ b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Bespoke-Thought/b43702d0-eef7-42d8-87b9-c1cbd0edb417.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Bespoke-Thought/1762652580.046056", + "retrieved_timestamp": "1762652580.046057", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Llama-3.2-3B-Bespoke-Thought", + "developer": "meta", + "inference_platform": "unknown", + "id": "bunnycore/Llama-3.2-3B-Bespoke-Thought" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4112621178473118 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45217398665008424 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33025 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31100398936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Booval/9cb855b6-e141-492a-99fb-98858d76f66c.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Booval/9cb855b6-e141-492a-99fb-98858d76f66c.json new file mode 100644 index 000000000..a27907dd9 --- /dev/null +++ b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Booval/9cb855b6-e141-492a-99fb-98858d76f66c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Booval/1762652580.046278", + "retrieved_timestamp": "1762652580.046279", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Llama-3.2-3B-Booval", + "developer": "meta", + "inference_platform": "unknown", + "id": "bunnycore/Llama-3.2-3B-Booval" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6669259786256023 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45143904014934083 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1268882175226586 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3394270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30576795212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Deep-Test/76edae8d-f4d3-41b2-8a24-cc676feed31c.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Deep-Test/76edae8d-f4d3-41b2-8a24-cc676feed31c.json new file mode 100644 index 000000000..1eb4d6112 --- /dev/null +++ b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Deep-Test/76edae8d-f4d3-41b2-8a24-cc676feed31c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Deep-Test/1762652580.046704", + "retrieved_timestamp": "1762652580.046706", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Llama-3.2-3B-Deep-Test", + "developer": "meta", + "inference_platform": "unknown", + "id": "bunnycore/Llama-3.2-3B-Deep-Test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46516797652451053 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4530851376077318 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33939583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3152426861702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Deep-Test/f150ea9d-0e4a-49c7-aa12-a703ca011755.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Deep-Test/f150ea9d-0e4a-49c7-aa12-a703ca011755.json new file mode 100644 index 000000000..615ccc7bf --- /dev/null +++ b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Deep-Test/f150ea9d-0e4a-49c7-aa12-a703ca011755.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Deep-Test/1762652580.046481", + "retrieved_timestamp": "1762652580.046481", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Llama-3.2-3B-Deep-Test", + "developer": "meta", + "inference_platform": "unknown", + "id": "bunnycore/Llama-3.2-3B-Deep-Test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17753006467284582 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29502574011260374 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3646666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10488696808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.803 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Della/8c23bcaf-2753-4f60-85ec-e92a48b8bba3.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Della/8c23bcaf-2753-4f60-85ec-e92a48b8bba3.json new file mode 100644 index 000000000..a78299774 --- /dev/null +++ b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Della/8c23bcaf-2753-4f60-85ec-e92a48b8bba3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Della/1762652580.0469692", + "retrieved_timestamp": "1762652580.0469701", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Llama-3.2-3B-Della", + "developer": "meta", + "inference_platform": "unknown", + "id": "bunnycore/Llama-3.2-3B-Della" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35608297096149333 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36834936417932634 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030211480362537766 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39015625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21284906914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Long-Think/bf24dc90-551e-4e0d-8525-9b3b8c4ccfe1.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Long-Think/bf24dc90-551e-4e0d-8525-9b3b8c4ccfe1.json new file mode 100644 index 000000000..5e62ed1b1 --- /dev/null +++ b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Long-Think/bf24dc90-551e-4e0d-8525-9b3b8c4ccfe1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Long-Think/1762652580.047193", + "retrieved_timestamp": "1762652580.047194", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Llama-3.2-3B-Long-Think", + "developer": "meta", + "inference_platform": "unknown", + "id": "bunnycore/Llama-3.2-3B-Long-Think" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5473499204333391 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4610394542442049 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14577039274924472 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33955208333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30477061170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Mix-Skill/7a6d897c-0efe-4c18-808c-25f6b9a78b5d.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Mix-Skill/7a6d897c-0efe-4c18-808c-25f6b9a78b5d.json new file mode 100644 index 000000000..ca3f6de20 --- /dev/null +++ b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-Mix-Skill/7a6d897c-0efe-4c18-808c-25f6b9a78b5d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Mix-Skill/1762652580.047411", + "retrieved_timestamp": "1762652580.047412", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Llama-3.2-3B-Mix-Skill", + "developer": "meta", + "inference_platform": "unknown", + "id": "bunnycore/Llama-3.2-3B-Mix-Skill" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6404229666174639 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45818358891543803 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1472809667673716 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33961458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3120844414893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ProdigyPlus/0ef3d0a9-a3e9-4b33-bece-bd7eec82514d.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ProdigyPlus/0ef3d0a9-a3e9-4b33-bece-bd7eec82514d.json new file mode 100644 index 000000000..c048aa9e9 --- /dev/null +++ b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ProdigyPlus/0ef3d0a9-a3e9-4b33-bece-bd7eec82514d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-ProdigyPlus/1762652580.047628", + "retrieved_timestamp": "1762652580.047629", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Llama-3.2-3B-ProdigyPlus", + "developer": "meta", + "inference_platform": "unknown", + "id": "bunnycore/Llama-3.2-3B-ProdigyPlus" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40152018865499095 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4392279045834126 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11555891238670694 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35800000000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28174867021276595 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ProdigyPlusPlus/485d4a25-810a-4022-828b-15c255fa2004.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ProdigyPlusPlus/485d4a25-810a-4022-828b-15c255fa2004.json new file mode 100644 index 000000000..bf7ddc34a --- /dev/null +++ b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ProdigyPlusPlus/485d4a25-810a-4022-828b-15c255fa2004.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-ProdigyPlusPlus/1762652580.047838", + "retrieved_timestamp": "1762652580.047839", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Llama-3.2-3B-ProdigyPlusPlus", + "developer": "meta", + "inference_platform": "unknown", + "id": "bunnycore/Llama-3.2-3B-ProdigyPlusPlus" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1645157072124186 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3689926047041594 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.354125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15001662234042554 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-RP-DeepThink/d24cf761-7c11-4f9b-9e41-ca24ac1225b9.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-RP-DeepThink/d24cf761-7c11-4f9b-9e41-ca24ac1225b9.json new file mode 100644 index 000000000..293f2d3d3 --- /dev/null +++ b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-RP-DeepThink/d24cf761-7c11-4f9b-9e41-ca24ac1225b9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-RP-DeepThink/1762652580.048058", + "retrieved_timestamp": "1762652580.048059", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Llama-3.2-3B-RP-DeepThink", + "developer": "meta", + "inference_platform": "unknown", + "id": "bunnycore/Llama-3.2-3B-RP-DeepThink" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7143867161354096 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45625632795830356 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1608761329305136 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33021875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32421875 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-RRStock/f1af1d33-fb95-462d-830c-5330d6481b6a.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-RRStock/f1af1d33-fb95-462d-830c-5330d6481b6a.json new file mode 100644 index 000000000..5ea29ac49 --- /dev/null +++ b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-RRStock/f1af1d33-fb95-462d-830c-5330d6481b6a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-RRStock/1762652580.048298", + "retrieved_timestamp": "1762652580.048298", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Llama-3.2-3B-RRStock", + "developer": "meta", + "inference_platform": "unknown", + "id": "bunnycore/Llama-3.2-3B-RRStock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6657269378582162 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45676937648721455 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16993957703927492 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3314270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32355385638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ToxicKod/d59a73eb-0aee-49f8-abce-6500f1fae79d.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ToxicKod/d59a73eb-0aee-49f8-abce-6500f1fae79d.json new file mode 100644 index 000000000..dac210480 --- /dev/null +++ b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3B-ToxicKod/d59a73eb-0aee-49f8-abce-6500f1fae79d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-ToxicKod/1762652580.0485172", + "retrieved_timestamp": "1762652580.048518", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Llama-3.2-3B-ToxicKod", + "developer": "meta", + "inference_platform": "unknown", + "id": "bunnycore/Llama-3.2-3B-ToxicKod" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6319299458769398 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4525429005077621 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16993957703927492 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34745833333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28798204787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3b-RP-Toxic-Fuse/4c2bc39c-2d04-4afd-a94d-bc8f59e75755.json b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3b-RP-Toxic-Fuse/4c2bc39c-2d04-4afd-a94d-bc8f59e75755.json new file mode 100644 index 000000000..3883788e6 --- /dev/null +++ b/data/hfopenllm_v2/meta/bunnycore/Llama-3.2-3b-RP-Toxic-Fuse/4c2bc39c-2d04-4afd-a94d-bc8f59e75755.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3b-RP-Toxic-Fuse/1762652580.048726", + "retrieved_timestamp": "1762652580.048727", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Llama-3.2-3b-RP-Toxic-Fuse", + "developer": "meta", + "inference_platform": "unknown", + "id": "bunnycore/Llama-3.2-3b-RP-Toxic-Fuse" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.683362367407368 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46497242330684924 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24018126888217523 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3953645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31058843085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/bunnycore/Smol-Llama-3.2-3B/eed01a32-3282-40c9-9a6c-9a0eae79fc8e.json b/data/hfopenllm_v2/meta/bunnycore/Smol-Llama-3.2-3B/eed01a32-3282-40c9-9a6c-9a0eae79fc8e.json new file mode 100644 index 000000000..389ee28c3 --- /dev/null +++ b/data/hfopenllm_v2/meta/bunnycore/Smol-Llama-3.2-3B/eed01a32-3282-40c9-9a6c-9a0eae79fc8e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Smol-Llama-3.2-3B/1762652580.061756", + "retrieved_timestamp": "1762652580.0617611", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Smol-Llama-3.2-3B", + "developer": "meta", + "inference_platform": "unknown", + "id": "bunnycore/Smol-Llama-3.2-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6678501930433471 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.453881406940321 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13821752265861026 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34600000000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3228058510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/chargoddard/prometheus-2-llama-3-8b/ea26b157-81d0-4aa2-a6df-d1d391ab2a3b.json b/data/hfopenllm_v2/meta/chargoddard/prometheus-2-llama-3-8b/ea26b157-81d0-4aa2-a6df-d1d391ab2a3b.json new file mode 100644 index 000000000..42afdf28a --- /dev/null +++ b/data/hfopenllm_v2/meta/chargoddard/prometheus-2-llama-3-8b/ea26b157-81d0-4aa2-a6df-d1d391ab2a3b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/chargoddard_prometheus-2-llama-3-8b/1762652580.100514", + "retrieved_timestamp": "1762652580.100516", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "chargoddard/prometheus-2-llama-3-8b", + "developer": "meta", + "inference_platform": "unknown", + "id": "chargoddard/prometheus-2-llama-3-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5288900118352637 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4931144581470071 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0823262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33958333333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30867686170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/cloudyu/Llama-3-70Bx2-MOE/8d0fa497-cdaa-4206-ae80-babed3089d43.json b/data/hfopenllm_v2/meta/cloudyu/Llama-3-70Bx2-MOE/8d0fa497-cdaa-4206-ae80-babed3089d43.json new file mode 100644 index 000000000..5eb950946 --- /dev/null +++ b/data/hfopenllm_v2/meta/cloudyu/Llama-3-70Bx2-MOE/8d0fa497-cdaa-4206-ae80-babed3089d43.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cloudyu_Llama-3-70Bx2-MOE/1762652580.10177", + "retrieved_timestamp": "1762652580.101771", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cloudyu/Llama-3-70Bx2-MOE", + "developer": "meta", + "inference_platform": "unknown", + "id": "cloudyu/Llama-3-70Bx2-MOE" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5482486469234964 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6636234572270707 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2175226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3934563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48118750000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5142121010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 126.926 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/cloudyu/Llama-3.2-3Bx4/0f4eaf10-0a2d-48e7-9c22-e1c771da16a0.json b/data/hfopenllm_v2/meta/cloudyu/Llama-3.2-3Bx4/0f4eaf10-0a2d-48e7-9c22-e1c771da16a0.json new file mode 100644 index 000000000..7fb2937d2 --- /dev/null +++ b/data/hfopenllm_v2/meta/cloudyu/Llama-3.2-3Bx4/0f4eaf10-0a2d-48e7-9c22-e1c771da16a0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cloudyu_Llama-3.2-3Bx4/1762652580.102047", + "retrieved_timestamp": "1762652580.102048", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cloudyu/Llama-3.2-3Bx4", + "developer": "meta", + "inference_platform": "unknown", + "id": "cloudyu/Llama-3.2-3Bx4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5068584688626179 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43321946547659324 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10725075528700906 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3495625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29853723404255317 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 9.949 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/cloudyu/S1-Llama-3.2-3Bx4-MoE/4cd18600-a389-4a22-88f8-0e35739665bb.json b/data/hfopenllm_v2/meta/cloudyu/S1-Llama-3.2-3Bx4-MoE/4cd18600-a389-4a22-88f8-0e35739665bb.json new file mode 100644 index 000000000..625463392 --- /dev/null +++ b/data/hfopenllm_v2/meta/cloudyu/S1-Llama-3.2-3Bx4-MoE/4cd18600-a389-4a22-88f8-0e35739665bb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cloudyu_S1-Llama-3.2-3Bx4-MoE/1762652580.103262", + "retrieved_timestamp": "1762652580.103263", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cloudyu/S1-Llama-3.2-3Bx4-MoE", + "developer": "meta", + "inference_platform": "unknown", + "id": "cloudyu/S1-Llama-3.2-3Bx4-MoE" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.530214275899059 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43578925882973 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12009063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.345625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30435505319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 9.555 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo/e89bbd89-f8fa-4156-94d8-6f390a383557.json b/data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo/e89bbd89-f8fa-4156-94d8-6f390a383557.json new file mode 100644 index 000000000..54a260ca3 --- /dev/null +++ b/data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo/e89bbd89-f8fa-4156-94d8-6f390a383557.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cluebbers_Llama-3.1-8B-paraphrase-type-generation-apty-ipo/1762652580.109549", + "retrieved_timestamp": "1762652580.1095521", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo", + "developer": "meta", + "inference_platform": "unknown", + "id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1326668794354535 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3800219303191354 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.024924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43321875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2590591755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid/f7aec62a-004e-4034-b4d9-152452bb519a.json b/data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid/f7aec62a-004e-4034-b4d9-152452bb519a.json new file mode 100644 index 000000000..cfcc73890 --- /dev/null +++ b/data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid/f7aec62a-004e-4034-b4d9-152452bb519a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cluebbers_Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid/1762652580.110752", + "retrieved_timestamp": "1762652580.110753", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid", + "developer": "meta", + "inference_platform": "unknown", + "id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13184240038652995 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37889016032903705 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43055208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2562333776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc/dbec72eb-bef2-4985-9ac6-bf5c6dabc25c.json b/data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc/dbec72eb-bef2-4985-9ac6-bf5c6dabc25c.json new file mode 100644 index 000000000..3c97761d8 --- /dev/null +++ b/data/hfopenllm_v2/meta/cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc/dbec72eb-bef2-4985-9ac6-bf5c6dabc25c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cluebbers_Llama-3.1-8B-paraphrase-type-generation-etpc/1762652580.1111748", + "retrieved_timestamp": "1762652580.111176", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc", + "developer": "meta", + "inference_platform": "unknown", + "id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12085156274241235 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3780811415223316 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43185416666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25556848404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/cognitivecomputations/Dolphin3.0-Llama3.1-8B/fa439482-ca9c-49c3-9732-1147c3965c56.json b/data/hfopenllm_v2/meta/cognitivecomputations/Dolphin3.0-Llama3.1-8B/fa439482-ca9c-49c3-9732-1147c3965c56.json new file mode 100644 index 000000000..e02490fa2 --- /dev/null +++ b/data/hfopenllm_v2/meta/cognitivecomputations/Dolphin3.0-Llama3.1-8B/fa439482-ca9c-49c3-9732-1147c3965c56.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_Dolphin3.0-Llama3.1-8B/1762652580.111501", + "retrieved_timestamp": "1762652580.1115022", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cognitivecomputations/Dolphin3.0-Llama3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "cognitivecomputations/Dolphin3.0-Llama3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7621222799948582 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4916366353921198 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12311178247734139 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36534375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2992021276595745 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/cognitivecomputations/Dolphin3.0-Llama3.2-1B/0aecb893-2b9b-4cfd-bf97-b9887b0aa539.json b/data/hfopenllm_v2/meta/cognitivecomputations/Dolphin3.0-Llama3.2-1B/0aecb893-2b9b-4cfd-bf97-b9887b0aa539.json new file mode 100644 index 000000000..8b5b5f818 --- /dev/null +++ b/data/hfopenllm_v2/meta/cognitivecomputations/Dolphin3.0-Llama3.2-1B/0aecb893-2b9b-4cfd-bf97-b9887b0aa539.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_Dolphin3.0-Llama3.2-1B/1762652580.112042", + "retrieved_timestamp": "1762652580.112046", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cognitivecomputations/Dolphin3.0-Llama3.2-1B", + "developer": "meta", + "inference_platform": "unknown", + "id": "cognitivecomputations/Dolphin3.0-Llama3.2-1B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5427787160290252 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31222474255909144 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32488541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13754986702127658 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9-llama3-8b/d985b9ab-a760-4a50-973e-6985e778b97d.json b/data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9-llama3-8b/d985b9ab-a760-4a50-973e-6985e778b97d.json new file mode 100644 index 000000000..3e51895eb --- /dev/null +++ b/data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9-llama3-8b/d985b9ab-a760-4a50-973e-6985e778b97d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9-llama3-8b/1762652580.113044", + "retrieved_timestamp": "1762652580.113045", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cognitivecomputations/dolphin-2.9-llama3-8b", + "developer": "meta", + "inference_platform": "unknown", + "id": "cognitivecomputations/dolphin-2.9-llama3-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38503393218881454 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49499220166609187 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43753125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.277094414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9.1-llama-3-70b/7c975279-f21e-418b-bc0b-739a933b91dc.json b/data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9.1-llama-3-70b/7c975279-f21e-418b-bc0b-739a933b91dc.json new file mode 100644 index 000000000..e04191792 --- /dev/null +++ b/data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9.1-llama-3-70b/7c975279-f21e-418b-bc0b-739a933b91dc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.1-llama-3-70b/1762652580.113282", + "retrieved_timestamp": "1762652580.1132832", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cognitivecomputations/dolphin-2.9.1-llama-3-70b", + "developer": "meta", + "inference_platform": "unknown", + "id": "cognitivecomputations/dolphin-2.9.1-llama-3-70b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3760167466765959 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5204919312821467 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18202416918429004 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49756249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41298204787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9.4-llama3.1-8b/d7da3f99-b538-4b33-a3dc-b2e4a96d3f89.json b/data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9.4-llama3.1-8b/d7da3f99-b538-4b33-a3dc-b2e4a96d3f89.json new file mode 100644 index 000000000..82af7f87c --- /dev/null +++ b/data/hfopenllm_v2/meta/cognitivecomputations/dolphin-2.9.4-llama3.1-8b/d7da3f99-b538-4b33-a3dc-b2e4a96d3f89.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.4-llama3.1-8b/1762652580.1160939", + "retrieved_timestamp": "1762652580.116095", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cognitivecomputations/dolphin-2.9.4-llama3.1-8b", + "developer": "meta", + "inference_platform": "unknown", + "id": "cognitivecomputations/dolphin-2.9.4-llama3.1-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27572396796056686 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35236263850832567 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3236145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12367021276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2/55eeee3c-b812-4359-ab5f-4e3fa976648f.json b/data/hfopenllm_v2/meta/collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2/55eeee3c-b812-4359-ab5f-4e3fa976648f.json new file mode 100644 index 000000000..ed8403626 --- /dev/null +++ b/data/hfopenllm_v2/meta/collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2/55eeee3c-b812-4359-ab5f-4e3fa976648f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/collaiborateorg_Collaiborator-MEDLLM-Llama-3-8B-v2/1762652580.116315", + "retrieved_timestamp": "1762652580.116315", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2", + "developer": "meta", + "inference_platform": "unknown", + "id": "collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.380887157187374 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46480279544898967 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33305369127516776 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3434270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3480718085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/cpayne1303/llama-43m-beta/d79e4774-159d-4b47-8cc0-64d7844e7bfc.json b/data/hfopenllm_v2/meta/cpayne1303/llama-43m-beta/d79e4774-159d-4b47-8cc0-64d7844e7bfc.json new file mode 100644 index 000000000..c25c2a8fd --- /dev/null +++ b/data/hfopenllm_v2/meta/cpayne1303/llama-43m-beta/d79e4774-159d-4b47-8cc0-64d7844e7bfc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cpayne1303_llama-43m-beta/1762652580.117069", + "retrieved_timestamp": "1762652580.1170702", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cpayne1303/llama-43m-beta", + "developer": "meta", + "inference_platform": "unknown", + "id": "cpayne1303/llama-43m-beta" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19156837191983936 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29767781029884355 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3871770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11319813829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.043 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/cpayne1303/llama-43m-beta/d987e61a-c7cc-4072-9e2c-faa6304eab65.json b/data/hfopenllm_v2/meta/cpayne1303/llama-43m-beta/d987e61a-c7cc-4072-9e2c-faa6304eab65.json new file mode 100644 index 000000000..453bd1b9e --- /dev/null +++ b/data/hfopenllm_v2/meta/cpayne1303/llama-43m-beta/d987e61a-c7cc-4072-9e2c-faa6304eab65.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cpayne1303_llama-43m-beta/1762652580.117342", + "retrieved_timestamp": "1762652580.117342", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cpayne1303/llama-43m-beta", + "developer": "meta", + "inference_platform": "unknown", + "id": "cpayne1303/llama-43m-beta" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19489066787235645 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29646319842669744 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.004531722054380665 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3885416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11112034574468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.043 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/cstr/llama3.1-8b-spaetzle-v90/73270182-a54d-4fc5-834a-89283677c1af.json b/data/hfopenllm_v2/meta/cstr/llama3.1-8b-spaetzle-v90/73270182-a54d-4fc5-834a-89283677c1af.json new file mode 100644 index 000000000..3560f7fb3 --- /dev/null +++ b/data/hfopenllm_v2/meta/cstr/llama3.1-8b-spaetzle-v90/73270182-a54d-4fc5-834a-89283677c1af.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cstr_llama3.1-8b-spaetzle-v90/1762652580.117986", + "retrieved_timestamp": "1762652580.1179872", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cstr/llama3.1-8b-spaetzle-v90", + "developer": "meta", + "inference_platform": "unknown", + "id": "cstr/llama3.1-8b-spaetzle-v90" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7356192679867197 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5302860633332208 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14954682779456194 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41343749999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37308843085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/8df04772-fc5c-4dfb-8366-f9844bf52a0e.json b/data/hfopenllm_v2/meta/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/8df04772-fc5c-4dfb-8366-f9844bf52a0e.json new file mode 100644 index 000000000..a8036f1b4 --- /dev/null +++ b/data/hfopenllm_v2/meta/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/8df04772-fc5c-4dfb-8366-f9844bf52a0e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Llama-70B/1762652580.121449", + "retrieved_timestamp": "1762652580.12145", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "developer": "meta", + "inference_platform": "unknown", + "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43359397509718656 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5634962649702303 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3074018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43421875000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4748171542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/650f54ba-4d43-4e31-92cd-16c7c1913b34.json b/data/hfopenllm_v2/meta/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/650f54ba-4d43-4e31-92cd-16c7c1913b34.json new file mode 100644 index 000000000..bde88da94 --- /dev/null +++ b/data/hfopenllm_v2/meta/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/650f54ba-4d43-4e31-92cd-16c7c1913b34.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Llama-8B/1762652580.121731", + "retrieved_timestamp": "1762652580.121734", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37823973723054827 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.323935108539057 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21978851963746224 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2550335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32497916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20894281914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/dfurman/Llama-3-70B-Orpo-v0.1/854d263a-00cc-488a-83eb-c69bb74da5b5.json b/data/hfopenllm_v2/meta/dfurman/Llama-3-70B-Orpo-v0.1/854d263a-00cc-488a-83eb-c69bb74da5b5.json new file mode 100644 index 000000000..a083083c0 --- /dev/null +++ b/data/hfopenllm_v2/meta/dfurman/Llama-3-70B-Orpo-v0.1/854d263a-00cc-488a-83eb-c69bb74da5b5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dfurman_Llama-3-70B-Orpo-v0.1/1762652580.124833", + "retrieved_timestamp": "1762652580.124834", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dfurman/Llama-3-70B-Orpo-v0.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "dfurman/Llama-3-70B-Orpo-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20490742341431845 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46552376347015506 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1578549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4534375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38929521276595747 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/dfurman/Llama-3-8B-Orpo-v0.1/0a6a3c2b-c0f5-44c7-9ac2-e278a303197e.json b/data/hfopenllm_v2/meta/dfurman/Llama-3-8B-Orpo-v0.1/0a6a3c2b-c0f5-44c7-9ac2-e278a303197e.json new file mode 100644 index 000000000..775e8daac --- /dev/null +++ b/data/hfopenllm_v2/meta/dfurman/Llama-3-8B-Orpo-v0.1/0a6a3c2b-c0f5-44c7-9ac2-e278a303197e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dfurman_Llama-3-8B-Orpo-v0.1/1762652580.1253839", + "retrieved_timestamp": "1762652580.125385", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dfurman/Llama-3-8B-Orpo-v0.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "dfurman/Llama-3-8B-Orpo-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3000039894147528 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3852967582460245 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.041540785498489434 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.357875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22805851063829788 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/dfurman/Llama-3-8B-Orpo-v0.1/10047fc1-254f-406c-807c-3274d9780550.json b/data/hfopenllm_v2/meta/dfurman/Llama-3-8B-Orpo-v0.1/10047fc1-254f-406c-807c-3274d9780550.json new file mode 100644 index 000000000..80865c01b --- /dev/null +++ b/data/hfopenllm_v2/meta/dfurman/Llama-3-8B-Orpo-v0.1/10047fc1-254f-406c-807c-3274d9780550.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dfurman_Llama-3-8B-Orpo-v0.1/1762652580.125153", + "retrieved_timestamp": "1762652580.125154", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dfurman/Llama-3-8B-Orpo-v0.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "dfurman/Llama-3-8B-Orpo-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28351773294857646 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3842420919898036 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05211480362537765 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3566354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22980385638297873 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/dnhkng/RYS-Llama3.1-Large/ca04e634-81e6-49fb-bdc4-2ff0ef04b75f.json b/data/hfopenllm_v2/meta/dnhkng/RYS-Llama3.1-Large/ca04e634-81e6-49fb-bdc4-2ff0ef04b75f.json new file mode 100644 index 000000000..d49429938 --- /dev/null +++ b/data/hfopenllm_v2/meta/dnhkng/RYS-Llama3.1-Large/ca04e634-81e6-49fb-bdc4-2ff0ef04b75f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama3.1-Large/1762652580.133179", + "retrieved_timestamp": "1762652580.1331809", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "dnhkng/RYS-Llama3.1-Large", + "developer": "meta", + "inference_platform": "unknown", + "id": "dnhkng/RYS-Llama3.1-Large" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8492001223420524 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6899112229777242 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3504531722054381 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37416107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4553958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5248503989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 81.677 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/duyhv1411/Llama-3.2-1B-en-vi/000fcba9-c157-48de-b672-f583f4cd3881.json b/data/hfopenllm_v2/meta/duyhv1411/Llama-3.2-1B-en-vi/000fcba9-c157-48de-b672-f583f4cd3881.json new file mode 100644 index 000000000..351c3657d --- /dev/null +++ b/data/hfopenllm_v2/meta/duyhv1411/Llama-3.2-1B-en-vi/000fcba9-c157-48de-b672-f583f4cd3881.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/duyhv1411_Llama-3.2-1B-en-vi/1762652580.1364539", + "retrieved_timestamp": "1762652580.1364548", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "duyhv1411/Llama-3.2-1B-en-vi", + "developer": "meta", + "inference_platform": "unknown", + "id": "duyhv1411/Llama-3.2-1B-en-vi" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4788317220530415 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.329090872737918 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028700906344410877 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3197083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13414228723404256 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/duyhv1411/Llama-3.2-3B-en-vi/31381b9d-77fe-491d-891c-de4fd37fa1cd.json b/data/hfopenllm_v2/meta/duyhv1411/Llama-3.2-3B-en-vi/31381b9d-77fe-491d-891c-de4fd37fa1cd.json new file mode 100644 index 000000000..9aa94fc19 --- /dev/null +++ b/data/hfopenllm_v2/meta/duyhv1411/Llama-3.2-3B-en-vi/31381b9d-77fe-491d-891c-de4fd37fa1cd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/duyhv1411_Llama-3.2-3B-en-vi/1762652580.136725", + "retrieved_timestamp": "1762652580.136726", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "duyhv1411/Llama-3.2-3B-en-vi", + "developer": "meta", + "inference_platform": "unknown", + "id": "duyhv1411/Llama-3.2-3B-en-vi" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4852014876084345 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271639320986486 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.022658610271903322 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3210104166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13588763297872342 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ehristoforu/HappyLlama1/07a29c73-e3f4-4f01-b105-ac1ef2fdff43.json b/data/hfopenllm_v2/meta/ehristoforu/HappyLlama1/07a29c73-e3f4-4f01-b105-ac1ef2fdff43.json new file mode 100644 index 000000000..cfdb226c2 --- /dev/null +++ b/data/hfopenllm_v2/meta/ehristoforu/HappyLlama1/07a29c73-e3f4-4f01-b105-ac1ef2fdff43.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_HappyLlama1/1762652580.139553", + "retrieved_timestamp": "1762652580.139554", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/HappyLlama1", + "developer": "meta", + "inference_platform": "unknown", + "id": "ehristoforu/HappyLlama1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7362686560548235 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49957323097428485 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14274924471299094 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42868749999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35455452127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ehristoforu/mllama-3.1-8b-it/c4fa1166-5255-4b95-8c7b-e1f93265f126.json b/data/hfopenllm_v2/meta/ehristoforu/mllama-3.1-8b-it/c4fa1166-5255-4b95-8c7b-e1f93265f126.json new file mode 100644 index 000000000..8b36e00a6 --- /dev/null +++ b/data/hfopenllm_v2/meta/ehristoforu/mllama-3.1-8b-it/c4fa1166-5255-4b95-8c7b-e1f93265f126.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_mllama-3.1-8b-it/1762652580.143829", + "retrieved_timestamp": "1762652580.14383", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/mllama-3.1-8b-it", + "developer": "meta", + "inference_platform": "unknown", + "id": "ehristoforu/mllama-3.1-8b-it" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38788193105404767 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4868027039491969 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37990936555891236 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3348645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26221742021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/flammenai/Llama3.1-Flammades-70B/92b8ecb7-80a2-4b77-bf20-8d87a36209c0.json b/data/hfopenllm_v2/meta/flammenai/Llama3.1-Flammades-70B/92b8ecb7-80a2-4b77-bf20-8d87a36209c0.json new file mode 100644 index 000000000..501d35cc2 --- /dev/null +++ b/data/hfopenllm_v2/meta/flammenai/Llama3.1-Flammades-70B/92b8ecb7-80a2-4b77-bf20-8d87a36209c0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/flammenai_Llama3.1-Flammades-70B/1762652580.154665", + "retrieved_timestamp": "1762652580.154666", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "flammenai/Llama3.1-Flammades-70B", + "developer": "meta", + "inference_platform": "unknown", + "id": "flammenai/Llama3.1-Flammades-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7058438277104748 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6659721866694542 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20921450151057402 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3540268456375839 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48705208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47523271276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/flammenai/Mahou-1.2a-llama3-8B/eb10ecab-2be4-4b75-9b85-d2f2786fd095.json b/data/hfopenllm_v2/meta/flammenai/Mahou-1.2a-llama3-8B/eb10ecab-2be4-4b75-9b85-d2f2786fd095.json new file mode 100644 index 000000000..29fdd2374 --- /dev/null +++ b/data/hfopenllm_v2/meta/flammenai/Mahou-1.2a-llama3-8B/eb10ecab-2be4-4b75-9b85-d2f2786fd095.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/flammenai_Mahou-1.2a-llama3-8B/1762652580.154922", + "retrieved_timestamp": "1762652580.154923", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "flammenai/Mahou-1.2a-llama3-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "flammenai/Mahou-1.2a-llama3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.50925655039739 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5093660540433169 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08383685800604229 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38466666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38173204787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/flammenai/Mahou-1.5-llama3.1-70B/653ff1ac-158e-4d36-a813-22ebef4a76ce.json b/data/hfopenllm_v2/meta/flammenai/Mahou-1.5-llama3.1-70B/653ff1ac-158e-4d36-a813-22ebef4a76ce.json new file mode 100644 index 000000000..17bbddf96 --- /dev/null +++ b/data/hfopenllm_v2/meta/flammenai/Mahou-1.5-llama3.1-70B/653ff1ac-158e-4d36-a813-22ebef4a76ce.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/flammenai_Mahou-1.5-llama3.1-70B/1762652580.155493", + "retrieved_timestamp": "1762652580.155494", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "flammenai/Mahou-1.5-llama3.1-70B", + "developer": "meta", + "inference_platform": "unknown", + "id": "flammenai/Mahou-1.5-llama3.1-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7146615424850509 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6650860641288713 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20996978851963746 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3540268456375839 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4950208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47490026595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/fluently-lm/Llama-TI-8B/63a32ad0-b871-437c-991a-342de8c13345.json b/data/hfopenllm_v2/meta/fluently-lm/Llama-TI-8B/63a32ad0-b871-437c-991a-342de8c13345.json new file mode 100644 index 000000000..395468ad4 --- /dev/null +++ b/data/hfopenllm_v2/meta/fluently-lm/Llama-TI-8B/63a32ad0-b871-437c-991a-342de8c13345.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/fluently-lm_Llama-TI-8B/1762652580.156513", + "retrieved_timestamp": "1762652580.156514", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fluently-lm/Llama-TI-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "fluently-lm/Llama-TI-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28803906966847964 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.520085504155627 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19637462235649547 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4102708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.343999335106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/fulim/FineLlama-3.1-8B/46fa0a20-2810-4f0b-befe-afc3fc774734.json b/data/hfopenllm_v2/meta/fulim/FineLlama-3.1-8B/46fa0a20-2810-4f0b-befe-afc3fc774734.json new file mode 100644 index 000000000..fd86cdf17 --- /dev/null +++ b/data/hfopenllm_v2/meta/fulim/FineLlama-3.1-8B/46fa0a20-2810-4f0b-befe-afc3fc774734.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/fulim_FineLlama-3.1-8B/1762652580.162704", + "retrieved_timestamp": "1762652580.162705", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fulim/FineLlama-3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "fulim/FineLlama-3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14388267574480157 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.456920741562608 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38673958333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31673869680851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/gbueno86/Brinebreath-Llama-3.1-70B/12e0e194-ef37-4da5-9354-e82f983fadb2.json b/data/hfopenllm_v2/meta/gbueno86/Brinebreath-Llama-3.1-70B/12e0e194-ef37-4da5-9354-e82f983fadb2.json new file mode 100644 index 000000000..4ca54b912 --- /dev/null +++ b/data/hfopenllm_v2/meta/gbueno86/Brinebreath-Llama-3.1-70B/12e0e194-ef37-4da5-9354-e82f983fadb2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/gbueno86_Brinebreath-Llama-3.1-70B/1762652580.1638331", + "retrieved_timestamp": "1762652580.163834", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gbueno86/Brinebreath-Llama-3.1-70B", + "developer": "meta", + "inference_platform": "unknown", + "id": "gbueno86/Brinebreath-Llama-3.1-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5532952565858589 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6880562247706813 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.297583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3464765100671141 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45406250000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5196143617021277 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b/9b7181ec-81f6-438a-8af6-a219f356f430.json b/data/hfopenllm_v2/meta/gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b/9b7181ec-81f6-438a-8af6-a219f356f430.json new file mode 100644 index 000000000..c420871c8 --- /dev/null +++ b/data/hfopenllm_v2/meta/gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b/9b7181ec-81f6-438a-8af6-a219f356f430.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/gbueno86_Meta-LLama-3-Cat-Smaug-LLama-70b/1762652580.1641119", + "retrieved_timestamp": "1762652580.1641128", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b", + "developer": "meta", + "inference_platform": "unknown", + "id": "gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8071849359698933 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6674314931312052 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2938066465256798 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271812080536913 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43682291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5074800531914894 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/glaiveai/Reflection-Llama-3.1-70B/3e8ba765-d24b-4ffe-a816-21ea02b7ba14.json b/data/hfopenllm_v2/meta/glaiveai/Reflection-Llama-3.1-70B/3e8ba765-d24b-4ffe-a816-21ea02b7ba14.json new file mode 100644 index 000000000..3c839cc27 --- /dev/null +++ b/data/hfopenllm_v2/meta/glaiveai/Reflection-Llama-3.1-70B/3e8ba765-d24b-4ffe-a816-21ea02b7ba14.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/glaiveai_Reflection-Llama-3.1-70B/1762652580.164674", + "retrieved_timestamp": "1762652580.164675", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "glaiveai/Reflection-Llama-3.1-70B", + "developer": "meta", + "inference_platform": "unknown", + "id": "glaiveai/Reflection-Llama-3.1-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5990571683134085 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5681010035620444 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2756797583081571 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43803125000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6341422872340425 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 69.5 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/gmonsoon/SahabatAI-Llama-11B-Test/48f5e083-9fa3-4753-a734-578ac3e15e1f.json b/data/hfopenllm_v2/meta/gmonsoon/SahabatAI-Llama-11B-Test/48f5e083-9fa3-4753-a734-578ac3e15e1f.json new file mode 100644 index 000000000..a46035689 --- /dev/null +++ b/data/hfopenllm_v2/meta/gmonsoon/SahabatAI-Llama-11B-Test/48f5e083-9fa3-4753-a734-578ac3e15e1f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/gmonsoon_SahabatAI-Llama-11B-Test/1762652580.16498", + "retrieved_timestamp": "1762652580.164981", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gmonsoon/SahabatAI-Llama-11B-Test", + "developer": "meta", + "inference_platform": "unknown", + "id": "gmonsoon/SahabatAI-Llama-11B-Test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33757319467900726 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4727584153058988 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030966767371601207 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40013541666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3182347074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 11.52 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B/f7439085-a0c9-4d5b-bd4f-bf1841d5ce02.json b/data/hfopenllm_v2/meta/grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B/f7439085-a0c9-4d5b-bd4f-bf1841d5ce02.json new file mode 100644 index 000000000..b20ef41e2 --- /dev/null +++ b/data/hfopenllm_v2/meta/grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B/f7439085-a0c9-4d5b-bd4f-bf1841d5ce02.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B/1762652580.181649", + "retrieved_timestamp": "1762652580.18165", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4797060687863757 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5269400362212973 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22205438066465258 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44078124999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3956948138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/grimjim/HuatuoSkywork-o1-Llama-3.1-8B/6a173156-75b3-47f4-9f88-ecace0ee6942.json b/data/hfopenllm_v2/meta/grimjim/HuatuoSkywork-o1-Llama-3.1-8B/6a173156-75b3-47f4-9f88-ecace0ee6942.json new file mode 100644 index 000000000..e06429419 --- /dev/null +++ b/data/hfopenllm_v2/meta/grimjim/HuatuoSkywork-o1-Llama-3.1-8B/6a173156-75b3-47f4-9f88-ecace0ee6942.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_HuatuoSkywork-o1-Llama-3.1-8B/1762652580.182574", + "retrieved_timestamp": "1762652580.182574", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/HuatuoSkywork-o1-Llama-3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "grimjim/HuatuoSkywork-o1-Llama-3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3961499931293413 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48863582396592203 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38821752265861026 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38385416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30950797872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/grimjim/Llama-Nephilim-Metamorphosis-v2-8B/ac20706b-0370-47de-bc6b-ae188f8a9259.json b/data/hfopenllm_v2/meta/grimjim/Llama-Nephilim-Metamorphosis-v2-8B/ac20706b-0370-47de-bc6b-ae188f8a9259.json new file mode 100644 index 000000000..e95c1b10c --- /dev/null +++ b/data/hfopenllm_v2/meta/grimjim/Llama-Nephilim-Metamorphosis-v2-8B/ac20706b-0370-47de-bc6b-ae188f8a9259.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_Llama-Nephilim-Metamorphosis-v2-8B/1762652580.183682", + "retrieved_timestamp": "1762652580.1836832", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/Llama-Nephilim-Metamorphosis-v2-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "grimjim/Llama-Nephilim-Metamorphosis-v2-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4544519652300341 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5013477378974034 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13972809667673716 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40909375000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38090093085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B/f2fbc411-4a4b-4727-9fdc-eda481f4f10c.json b/data/hfopenllm_v2/meta/grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B/f2fbc411-4a4b-4727-9fdc-eda481f4f10c.json new file mode 100644 index 000000000..844877ee3 --- /dev/null +++ b/data/hfopenllm_v2/meta/grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B/f2fbc411-4a4b-4727-9fdc-eda481f4f10c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B/1762652580.183897", + "retrieved_timestamp": "1762652580.183897", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43659157701565177 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5287189378780882 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30060422960725075 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3998541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3683510638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B/30482674-45a3-4400-84e0-eef215540eb5.json b/data/hfopenllm_v2/meta/grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B/30482674-45a3-4400-84e0-eef215540eb5.json new file mode 100644 index 000000000..cc44bc412 --- /dev/null +++ b/data/hfopenllm_v2/meta/grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B/30482674-45a3-4400-84e0-eef215540eb5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_SauerHuatuoSkywork-o1-Llama-3.1-8B/1762652580.186095", + "retrieved_timestamp": "1762652580.1860962", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5219462138237654 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5222077363554879 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1729607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45268749999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39910239361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v1-8B/498c4d5e-0500-42da-9c75-e8da578516f8.json b/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v1-8B/498c4d5e-0500-42da-9c75-e8da578516f8.json new file mode 100644 index 000000000..fe1db9e0c --- /dev/null +++ b/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v1-8B/498c4d5e-0500-42da-9c75-e8da578516f8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_llama-3-Nephilim-v1-8B/1762652580.186311", + "retrieved_timestamp": "1762652580.186312", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/llama-3-Nephilim-v1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "grimjim/llama-3-Nephilim-v1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4277239945566652 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5131817939007638 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09063444108761329 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41362499999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37957114361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v2-8B/de82dcd9-adae-4b28-8248-156e324e036d.json b/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v2-8B/de82dcd9-adae-4b28-8248-156e324e036d.json new file mode 100644 index 000000000..276f17cd0 --- /dev/null +++ b/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v2-8B/de82dcd9-adae-4b28-8248-156e324e036d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_llama-3-Nephilim-v2-8B/1762652580.186511", + "retrieved_timestamp": "1762652580.1865118", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/llama-3-Nephilim-v2-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "grimjim/llama-3-Nephilim-v2-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39222817679313116 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5048214936442625 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3895 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3641123670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v2.1-8B/df6327cf-82e1-437f-9c9a-c31205452717.json b/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v2.1-8B/df6327cf-82e1-437f-9c9a-c31205452717.json new file mode 100644 index 000000000..6c2a4f6a1 --- /dev/null +++ b/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v2.1-8B/df6327cf-82e1-437f-9c9a-c31205452717.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_llama-3-Nephilim-v2.1-8B/1762652580.186715", + "retrieved_timestamp": "1762652580.186715", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/llama-3-Nephilim-v2.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "grimjim/llama-3-Nephilim-v2.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38950540122430705 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5095042703104161 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09969788519637462 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3935 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3644448138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v3-8B/ecee6e6a-15a1-4455-9724-34ca14477064.json b/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v3-8B/ecee6e6a-15a1-4455-9724-34ca14477064.json new file mode 100644 index 000000000..5c2382168 --- /dev/null +++ b/data/hfopenllm_v2/meta/grimjim/llama-3-Nephilim-v3-8B/ecee6e6a-15a1-4455-9724-34ca14477064.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/grimjim_llama-3-Nephilim-v3-8B/1762652580.186964", + "retrieved_timestamp": "1762652580.186965", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "grimjim/llama-3-Nephilim-v3-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "grimjim/llama-3-Nephilim-v3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4173825449806513 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5012671264428366 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09516616314199396 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3989270833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3612034574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/hotmailuser/Llama-Hermes-slerp-8B/cf2de222-77bf-456c-acb3-c3aa33367a9d.json b/data/hfopenllm_v2/meta/hotmailuser/Llama-Hermes-slerp-8B/cf2de222-77bf-456c-acb3-c3aa33367a9d.json new file mode 100644 index 000000000..1f394c402 --- /dev/null +++ b/data/hfopenllm_v2/meta/hotmailuser/Llama-Hermes-slerp-8B/cf2de222-77bf-456c-acb3-c3aa33367a9d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_Llama-Hermes-slerp-8B/1762652580.1947231", + "retrieved_timestamp": "1762652580.194724", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/Llama-Hermes-slerp-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "hotmailuser/Llama-Hermes-slerp-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3390470617960345 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5310290010444968 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08006042296072508 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4077916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33311170212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/hotmailuser/Llama-Hermes-slerp2-8B/be5505d7-06ae-4ab5-ba7f-6ff4732b3180.json b/data/hfopenllm_v2/meta/hotmailuser/Llama-Hermes-slerp2-8B/be5505d7-06ae-4ab5-ba7f-6ff4732b3180.json new file mode 100644 index 000000000..1cb4537bb --- /dev/null +++ b/data/hfopenllm_v2/meta/hotmailuser/Llama-Hermes-slerp2-8B/be5505d7-06ae-4ab5-ba7f-6ff4732b3180.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_Llama-Hermes-slerp2-8B/1762652580.194975", + "retrieved_timestamp": "1762652580.194976", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/Llama-Hermes-slerp2-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "hotmailuser/Llama-Hermes-slerp2-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3728440537773109 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5265283171967207 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09743202416918428 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42481250000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33793218085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/hotmailuser/LlamaStock-8B/23b559eb-4493-462f-bb37-5e232b3336bc.json b/data/hfopenllm_v2/meta/hotmailuser/LlamaStock-8B/23b559eb-4493-462f-bb37-5e232b3336bc.json new file mode 100644 index 000000000..530d21743 --- /dev/null +++ b/data/hfopenllm_v2/meta/hotmailuser/LlamaStock-8B/23b559eb-4493-462f-bb37-5e232b3336bc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_LlamaStock-8B/1762652580.19518", + "retrieved_timestamp": "1762652580.19518", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/LlamaStock-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "hotmailuser/LlamaStock-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4249513513034304 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5328942883826541 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16993957703927492 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271812080536913 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41293749999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3806515957446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/huggyllama/llama-13b/20b49499-5df3-450c-a20d-dc421b937e91.json b/data/hfopenllm_v2/meta/huggyllama/llama-13b/20b49499-5df3-450c-a20d-dc421b937e91.json new file mode 100644 index 000000000..51b536a1f --- /dev/null +++ b/data/hfopenllm_v2/meta/huggyllama/llama-13b/20b49499-5df3-450c-a20d-dc421b937e91.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/huggyllama_llama-13b/1762652580.199647", + "retrieved_timestamp": "1762652580.199648", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "huggyllama/llama-13b", + "developer": "meta", + "inference_platform": "unknown", + "id": "huggyllama/llama-13b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24105262924595627 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39878925581174585 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2550335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34621875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19522938829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.016 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/huggyllama/llama-65b/2bff16e4-f0ed-4957-8b20-4ae269642088.json b/data/hfopenllm_v2/meta/huggyllama/llama-65b/2bff16e4-f0ed-4957-8b20-4ae269642088.json new file mode 100644 index 000000000..91d9a0558 --- /dev/null +++ b/data/hfopenllm_v2/meta/huggyllama/llama-65b/2bff16e4-f0ed-4957-8b20-4ae269642088.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/huggyllama_llama-65b/1762652580.1999428", + "retrieved_timestamp": "1762652580.199944", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "huggyllama/llama-65b", + "developer": "meta", + "inference_platform": "unknown", + "id": "huggyllama/llama-65b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25259311958935626 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4702556052882764 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030966767371601207 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35945833333333327 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3077626329787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 65.286 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/huggyllama/llama-7b/61a5624d-ef42-4fdd-a0b1-08fdc2d07615.json b/data/hfopenllm_v2/meta/huggyllama/llama-7b/61a5624d-ef42-4fdd-a0b1-08fdc2d07615.json new file mode 100644 index 000000000..d2d746a90 --- /dev/null +++ b/data/hfopenllm_v2/meta/huggyllama/llama-7b/61a5624d-ef42-4fdd-a0b1-08fdc2d07615.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/huggyllama_llama-7b/1762652580.200164", + "retrieved_timestamp": "1762652580.200165", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "huggyllama/llama-7b", + "developer": "meta", + "inference_platform": "unknown", + "id": "huggyllama/llama-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25009530268576263 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32773134782898566 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.008308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33539583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13131648936170212 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/iFaz/llama31_8B_en_emo_v4/198e5d81-0dcd-4dc0-9919-139ce0aa2dd5.json b/data/hfopenllm_v2/meta/iFaz/llama31_8B_en_emo_v4/198e5d81-0dcd-4dc0-9919-139ce0aa2dd5.json new file mode 100644 index 000000000..438fb4d9c --- /dev/null +++ b/data/hfopenllm_v2/meta/iFaz/llama31_8B_en_emo_v4/198e5d81-0dcd-4dc0-9919-139ce0aa2dd5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/iFaz_llama31_8B_en_emo_v4/1762652580.202469", + "retrieved_timestamp": "1762652580.202469", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "iFaz/llama31_8B_en_emo_v4", + "developer": "meta", + "inference_platform": "unknown", + "id": "iFaz/llama31_8B_en_emo_v4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3042504997850149 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49155384618761383 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08836858006042296 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3642916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3048537234042553 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "", + "params_billions": 4.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/iFaz/llama32_1B_en_emo_v1/f202b553-56e6-4a27-b2fa-0f98feabe11e.json b/data/hfopenllm_v2/meta/iFaz/llama32_1B_en_emo_v1/f202b553-56e6-4a27-b2fa-0f98feabe11e.json new file mode 100644 index 000000000..3b5f26b32 --- /dev/null +++ b/data/hfopenllm_v2/meta/iFaz/llama32_1B_en_emo_v1/f202b553-56e6-4a27-b2fa-0f98feabe11e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/iFaz_llama32_1B_en_emo_v1/1762652580.2027268", + "retrieved_timestamp": "1762652580.2027268", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "iFaz/llama32_1B_en_emo_v1", + "developer": "meta", + "inference_platform": "unknown", + "id": "iFaz/llama32_1B_en_emo_v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44083808738591385 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33802631394113886 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0377643504531722 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34888541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17611369680851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.765 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_1000_stp/a4111230-4313-4f75-bcd3-c598e436987b.json b/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_1000_stp/a4111230-4313-4f75-bcd3-c598e436987b.json new file mode 100644 index 000000000..6d83931f9 --- /dev/null +++ b/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_1000_stp/a4111230-4313-4f75-bcd3-c598e436987b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_1000_stp/1762652580.202935", + "retrieved_timestamp": "1762652580.2029362", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "iFaz/llama32_3B_en_emo_1000_stp", + "developer": "meta", + "inference_platform": "unknown", + "id": "iFaz/llama32_3B_en_emo_1000_stp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7295243287809678 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45218477635502685 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14652567975830816 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3620625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3123337765957447 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.848 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_2000_stp/5468fbdc-63e7-4e9d-8370-2f3f0e83e559.json b/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_2000_stp/5468fbdc-63e7-4e9d-8370-2f3f0e83e559.json new file mode 100644 index 000000000..553c1dbea --- /dev/null +++ b/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_2000_stp/5468fbdc-63e7-4e9d-8370-2f3f0e83e559.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_2000_stp/1762652580.203131", + "retrieved_timestamp": "1762652580.203132", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "iFaz/llama32_3B_en_emo_2000_stp", + "developer": "meta", + "inference_platform": "unknown", + "id": "iFaz/llama32_3B_en_emo_2000_stp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7368681764385165 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45345889848516396 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15332326283987915 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35269791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3097573138297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.848 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_300_stp/0806c872-f913-493a-ada4-7db88a93b840.json b/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_300_stp/0806c872-f913-493a-ada4-7db88a93b840.json new file mode 100644 index 000000000..ed11093b3 --- /dev/null +++ b/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_300_stp/0806c872-f913-493a-ada4-7db88a93b840.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_300_stp/1762652580.203331", + "retrieved_timestamp": "1762652580.203331", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "iFaz/llama32_3B_en_emo_300_stp", + "developer": "meta", + "inference_platform": "unknown", + "id": "iFaz/llama32_3B_en_emo_300_stp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.725552644760347 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45045681689917494 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16012084592145015 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3620625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3148271276595745 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.848 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_5000_stp/9ffc9dbb-065b-47ae-a985-541ee7f7126d.json b/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_5000_stp/9ffc9dbb-065b-47ae-a985-541ee7f7126d.json new file mode 100644 index 000000000..fbcb8e46b --- /dev/null +++ b/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_5000_stp/9ffc9dbb-065b-47ae-a985-541ee7f7126d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_5000_stp/1762652580.203531", + "retrieved_timestamp": "1762652580.203532", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "iFaz/llama32_3B_en_emo_5000_stp", + "developer": "meta", + "inference_platform": "unknown", + "id": "iFaz/llama32_3B_en_emo_5000_stp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7100404703963262 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4567949942342784 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12915407854984895 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34460416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30668218085106386 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.848 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_v2/03587c1e-14e3-434f-9582-448914832c95.json b/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_v2/03587c1e-14e3-434f-9582-448914832c95.json new file mode 100644 index 000000000..8426649d8 --- /dev/null +++ b/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_v2/03587c1e-14e3-434f-9582-448914832c95.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_v2/1762652580.203742", + "retrieved_timestamp": "1762652580.203743", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "iFaz/llama32_3B_en_emo_v2", + "developer": "meta", + "inference_platform": "unknown", + "id": "iFaz/llama32_3B_en_emo_v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5454017562290279 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4283518305582969 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10876132930513595 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34822916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3003656914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.848 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_v3/8bb5540b-b19d-4641-9dea-36ea43b07250.json b/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_v3/8bb5540b-b19d-4641-9dea-36ea43b07250.json new file mode 100644 index 000000000..e04168a67 --- /dev/null +++ b/data/hfopenllm_v2/meta/iFaz/llama32_3B_en_emo_v3/8bb5540b-b19d-4641-9dea-36ea43b07250.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_v3/1762652580.203954", + "retrieved_timestamp": "1762652580.203954", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "iFaz/llama32_3B_en_emo_v3", + "developer": "meta", + "inference_platform": "unknown", + "id": "iFaz/llama32_3B_en_emo_v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5759263199421978 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43013596402782367 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35527083333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27102726063829785 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.848 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/jiangxinyang-shanda/Homer-LLama3-8B/73c50ab1-bdf8-4fbc-b7e6-d4a8e8bb8a4e.json b/data/hfopenllm_v2/meta/jiangxinyang-shanda/Homer-LLama3-8B/73c50ab1-bdf8-4fbc-b7e6-d4a8e8bb8a4e.json new file mode 100644 index 000000000..96c11a286 --- /dev/null +++ b/data/hfopenllm_v2/meta/jiangxinyang-shanda/Homer-LLama3-8B/73c50ab1-bdf8-4fbc-b7e6-d4a8e8bb8a4e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/jiangxinyang-shanda_Homer-LLama3-8B/1762652580.2879412", + "retrieved_timestamp": "1762652580.287943", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "jiangxinyang-shanda/Homer-LLama3-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "jiangxinyang-shanda/Homer-LLama3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3991719748046295 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5173242047543128 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08610271903323263 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40562499999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3139128989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/keeeeenw/MicroLlama/7407c2ed-23f5-4c92-b987-2c3a91147d98.json b/data/hfopenllm_v2/meta/keeeeenw/MicroLlama/7407c2ed-23f5-4c92-b987-2c3a91147d98.json new file mode 100644 index 000000000..84d8a4ba2 --- /dev/null +++ b/data/hfopenllm_v2/meta/keeeeenw/MicroLlama/7407c2ed-23f5-4c92-b987-2c3a91147d98.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/keeeeenw_MicroLlama/1762652580.3060532", + "retrieved_timestamp": "1762652580.3060539", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "keeeeenw/MicroLlama", + "developer": "meta", + "inference_platform": "unknown", + "id": "keeeeenw/MicroLlama" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19853765785892544 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3007313991347165 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.011329305135951661 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36981249999999993 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11377992021276596 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.305 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/kevin009/llamaRAGdrama/41e4d24f-9790-40f5-a915-ee4155d5cbc6.json b/data/hfopenllm_v2/meta/kevin009/llamaRAGdrama/41e4d24f-9790-40f5-a915-ee4155d5cbc6.json new file mode 100644 index 000000000..714c63930 --- /dev/null +++ b/data/hfopenllm_v2/meta/kevin009/llamaRAGdrama/41e4d24f-9790-40f5-a915-ee4155d5cbc6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/kevin009_llamaRAGdrama/1762652580.3065941", + "retrieved_timestamp": "1762652580.3065941", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kevin009/llamaRAGdrama", + "developer": "meta", + "inference_platform": "unknown", + "id": "kevin009/llamaRAGdrama" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2598372318780835 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4007385667099335 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43157291666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27235704787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/khoantap/llama-3-8b-stock-merge/211ac2a5-5bd1-4347-8eb8-fa1bd4b1a5ad.json b/data/hfopenllm_v2/meta/khoantap/llama-3-8b-stock-merge/211ac2a5-5bd1-4347-8eb8-fa1bd4b1a5ad.json new file mode 100644 index 000000000..302726816 --- /dev/null +++ b/data/hfopenllm_v2/meta/khoantap/llama-3-8b-stock-merge/211ac2a5-5bd1-4347-8eb8-fa1bd4b1a5ad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/khoantap_llama-3-8b-stock-merge/1762652580.307331", + "retrieved_timestamp": "1762652580.307332", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "khoantap/llama-3-8b-stock-merge", + "developer": "meta", + "inference_platform": "unknown", + "id": "khoantap/llama-3-8b-stock-merge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48117993590340297 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5162255701726589 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16163141993957703 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39458333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37998670212765956 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/khoantap/llama-breadcrumbs-ties-merge/9eae434a-fb2a-45b9-a592-f39a9c469f07.json b/data/hfopenllm_v2/meta/khoantap/llama-breadcrumbs-ties-merge/9eae434a-fb2a-45b9-a592-f39a9c469f07.json new file mode 100644 index 000000000..9812ea48d --- /dev/null +++ b/data/hfopenllm_v2/meta/khoantap/llama-breadcrumbs-ties-merge/9eae434a-fb2a-45b9-a592-f39a9c469f07.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/khoantap_llama-breadcrumbs-ties-merge/1762652580.307606", + "retrieved_timestamp": "1762652580.307607", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "khoantap/llama-breadcrumbs-ties-merge", + "developer": "meta", + "inference_platform": "unknown", + "id": "khoantap/llama-breadcrumbs-ties-merge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22051933314716063 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5415928172799896 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11253776435045318 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44344791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3171542553191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/khoantap/llama-evolve-ties-best-merge/0ab7f323-1be5-4fc7-a5d8-d4f77f802da3.json b/data/hfopenllm_v2/meta/khoantap/llama-evolve-ties-best-merge/0ab7f323-1be5-4fc7-a5d8-d4f77f802da3.json new file mode 100644 index 000000000..7c647dbdd --- /dev/null +++ b/data/hfopenllm_v2/meta/khoantap/llama-evolve-ties-best-merge/0ab7f323-1be5-4fc7-a5d8-d4f77f802da3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/khoantap_llama-evolve-ties-best-merge/1762652580.307874", + "retrieved_timestamp": "1762652580.3078752", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "khoantap/llama-evolve-ties-best-merge", + "developer": "meta", + "inference_platform": "unknown", + "id": "khoantap/llama-evolve-ties-best-merge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6743950495795601 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5413565104914732 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39455208333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3859707446808511 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/khoantap/llama-linear-0.5-0.5-1-merge/0906fee9-0edd-494f-bf01-a34711f17596.json b/data/hfopenllm_v2/meta/khoantap/llama-linear-0.5-0.5-1-merge/0906fee9-0edd-494f-bf01-a34711f17596.json new file mode 100644 index 000000000..789d635cb --- /dev/null +++ b/data/hfopenllm_v2/meta/khoantap/llama-linear-0.5-0.5-1-merge/0906fee9-0edd-494f-bf01-a34711f17596.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/khoantap_llama-linear-0.5-0.5-1-merge/1762652580.3081899", + "retrieved_timestamp": "1762652580.308191", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "khoantap/llama-linear-0.5-0.5-1-merge", + "developer": "meta", + "inference_platform": "unknown", + "id": "khoantap/llama-linear-0.5-0.5-1-merge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48122980358781364 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5643013649244941 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2054380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41427083333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38331117021276595 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/khoantap/llama-linear-0.5-1-0.5-merge/88d174f6-6d30-4859-bbf0-6f5446ce1b9d.json b/data/hfopenllm_v2/meta/khoantap/llama-linear-0.5-1-0.5-merge/88d174f6-6d30-4859-bbf0-6f5446ce1b9d.json new file mode 100644 index 000000000..0f55ebbaa --- /dev/null +++ b/data/hfopenllm_v2/meta/khoantap/llama-linear-0.5-1-0.5-merge/88d174f6-6d30-4859-bbf0-6f5446ce1b9d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/khoantap_llama-linear-0.5-1-0.5-merge/1762652580.308497", + "retrieved_timestamp": "1762652580.308498", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "khoantap/llama-linear-0.5-1-0.5-merge", + "developer": "meta", + "inference_platform": "unknown", + "id": "khoantap/llama-linear-0.5-1-0.5-merge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5031616111916382 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5950766502131658 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14803625377643503 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4171875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3690159574468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/khoantap/llama-linear-1-0.5-0.5-merge/49e5e4e4-6905-4b9e-9f53-b7ac598b5102.json b/data/hfopenllm_v2/meta/khoantap/llama-linear-1-0.5-0.5-merge/49e5e4e4-6905-4b9e-9f53-b7ac598b5102.json new file mode 100644 index 000000000..713403a73 --- /dev/null +++ b/data/hfopenllm_v2/meta/khoantap/llama-linear-1-0.5-0.5-merge/49e5e4e4-6905-4b9e-9f53-b7ac598b5102.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/khoantap_llama-linear-1-0.5-0.5-merge/1762652580.308746", + "retrieved_timestamp": "1762652580.308747", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "khoantap/llama-linear-1-0.5-0.5-merge", + "developer": "meta", + "inference_platform": "unknown", + "id": "khoantap/llama-linear-1-0.5-0.5-merge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45145436331156885 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5526017944110775 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24773413897280966 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4117604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.363530585106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/khoantap/llama-slerp-merge/e30c2825-6d36-454c-8787-e5cbdfcbcfdf.json b/data/hfopenllm_v2/meta/khoantap/llama-slerp-merge/e30c2825-6d36-454c-8787-e5cbdfcbcfdf.json new file mode 100644 index 000000000..186939cad --- /dev/null +++ b/data/hfopenllm_v2/meta/khoantap/llama-slerp-merge/e30c2825-6d36-454c-8787-e5cbdfcbcfdf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/khoantap_llama-slerp-merge/1762652580.308971", + "retrieved_timestamp": "1762652580.3089721", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "khoantap/llama-slerp-merge", + "developer": "meta", + "inference_platform": "unknown", + "id": "khoantap/llama-slerp-merge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49799088640363126 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5782782780315171 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40531249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3677692819148936 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/khulaifi95/Llama-3.1-8B-Reason-Blend-888k/85a2710f-feaf-4dc2-aafa-04c33abf6425.json b/data/hfopenllm_v2/meta/khulaifi95/Llama-3.1-8B-Reason-Blend-888k/85a2710f-feaf-4dc2-aafa-04c33abf6425.json new file mode 100644 index 000000000..0870e79df --- /dev/null +++ b/data/hfopenllm_v2/meta/khulaifi95/Llama-3.1-8B-Reason-Blend-888k/85a2710f-feaf-4dc2-aafa-04c33abf6425.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/khulaifi95_Llama-3.1-8B-Reason-Blend-888k/1762652580.309421", + "retrieved_timestamp": "1762652580.309421", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "khulaifi95/Llama-3.1-8B-Reason-Blend-888k", + "developer": "meta", + "inference_platform": "unknown", + "id": "khulaifi95/Llama-3.1-8B-Reason-Blend-888k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.583170432230925 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4789526925494476 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11555891238670694 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3379375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3100066489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/laislemke/LLaMA-2-vicuna-7b-slerp/66d98c7d-7fd1-41bc-9229-855f9d02412d.json b/data/hfopenllm_v2/meta/laislemke/LLaMA-2-vicuna-7b-slerp/66d98c7d-7fd1-41bc-9229-855f9d02412d.json new file mode 100644 index 000000000..da58c0d5e --- /dev/null +++ b/data/hfopenllm_v2/meta/laislemke/LLaMA-2-vicuna-7b-slerp/66d98c7d-7fd1-41bc-9229-855f9d02412d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/laislemke_LLaMA-2-vicuna-7b-slerp/1762652580.311907", + "retrieved_timestamp": "1762652580.311908", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "laislemke/LLaMA-2-vicuna-7b-slerp", + "developer": "meta", + "inference_platform": "unknown", + "id": "laislemke/LLaMA-2-vicuna-7b-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29320979445648654 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29862163052356266 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.011329305135951661 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3833020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13422539893617022 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/lemon07r/Llama-3-RedMagic4-8B/22ae03c6-dd4f-4263-a005-624dae701da3.json b/data/hfopenllm_v2/meta/lemon07r/Llama-3-RedMagic4-8B/22ae03c6-dd4f-4263-a005-624dae701da3.json new file mode 100644 index 000000000..e14a86a09 --- /dev/null +++ b/data/hfopenllm_v2/meta/lemon07r/Llama-3-RedMagic4-8B/22ae03c6-dd4f-4263-a005-624dae701da3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lemon07r_Llama-3-RedMagic4-8B/1762652580.318728", + "retrieved_timestamp": "1762652580.318729", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lemon07r/Llama-3-RedMagic4-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "lemon07r/Llama-3-RedMagic4-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4864005283758206 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42560489470390417 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08987915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3766354166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3676030585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/lemon07r/llama-3-NeuralMahou-8b/13b8357d-225e-4ba0-bf34-45479a562532.json b/data/hfopenllm_v2/meta/lemon07r/llama-3-NeuralMahou-8b/13b8357d-225e-4ba0-bf34-45479a562532.json new file mode 100644 index 000000000..5a576bef2 --- /dev/null +++ b/data/hfopenllm_v2/meta/lemon07r/llama-3-NeuralMahou-8b/13b8357d-225e-4ba0-bf34-45479a562532.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lemon07r_llama-3-NeuralMahou-8b/1762652580.319005", + "retrieved_timestamp": "1762652580.319006", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lemon07r/llama-3-NeuralMahou-8b", + "developer": "meta", + "inference_platform": "unknown", + "id": "lemon07r/llama-3-NeuralMahou-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49009738604680025 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41841123683301523 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10196374622356495 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3872708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3690159574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full/37aa2a50-974f-4cb0-81e3-f160f08c8a0e.json b/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full/37aa2a50-974f-4cb0-81e3-f160f08c8a0e.json new file mode 100644 index 000000000..261385d6a --- /dev/null +++ b/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full/37aa2a50-974f-4cb0-81e3-f160f08c8a0e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-full/1762652580.32158", + "retrieved_timestamp": "1762652580.32158", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full", + "developer": "meta", + "inference_platform": "unknown", + "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5817464327983085 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4714219934773132 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07628398791540786 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3221875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33095079787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half/90ab1587-99b9-48e1-b3f3-8aaf07313eaa.json b/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half/90ab1587-99b9-48e1-b3f3-8aaf07313eaa.json new file mode 100644 index 000000000..10ceb771f --- /dev/null +++ b/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half/90ab1587-99b9-48e1-b3f3-8aaf07313eaa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-half/1762652580.3218", + "retrieved_timestamp": "1762652580.321801", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half", + "developer": "meta", + "inference_platform": "unknown", + "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6249107922534431 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47074584910573014 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09063444108761329 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24496644295302014 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35158333333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36136968085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25/ebfb14c0-d725-4650-9d04-ed4f7ebaf676.json b/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25/ebfb14c0-d725-4650-9d04-ed4f7ebaf676.json new file mode 100644 index 000000000..66d654fbb --- /dev/null +++ b/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25/ebfb14c0-d725-4650-9d04-ed4f7ebaf676.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-top25/1762652580.322012", + "retrieved_timestamp": "1762652580.322013", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25", + "developer": "meta", + "inference_platform": "unknown", + "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6636535503574958 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4864641205580417 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35660416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3684341755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75/fcb13fe4-e314-4cdd-ae6e-82531ad6a829.json b/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75/fcb13fe4-e314-4cdd-ae6e-82531ad6a829.json new file mode 100644 index 000000000..c2a0a0af3 --- /dev/null +++ b/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75/fcb13fe4-e314-4cdd-ae6e-82531ad6a829.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-top75/1762652580.322237", + "retrieved_timestamp": "1762652580.322238", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75", + "developer": "meta", + "inference_platform": "unknown", + "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6687245397766814 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48333166095856117 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07854984894259819 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3816875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37691156914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual/8eaee9b3-78b0-4523-9151-695c27c5cfa7.json b/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual/8eaee9b3-78b0-4523-9151-695c27c5cfa7.json new file mode 100644 index 000000000..b2d6ee97d --- /dev/null +++ b/data/hfopenllm_v2/meta/lightblue/suzume-llama-3-8B-multilingual/8eaee9b3-78b0-4523-9151-695c27c5cfa7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/lightblue_suzume-llama-3-8B-multilingual/1762652580.321283", + "retrieved_timestamp": "1762652580.321284", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "lightblue/suzume-llama-3-8B-multilingual", + "developer": "meta", + "inference_platform": "unknown", + "id": "lightblue/suzume-llama-3-8B-multilingual" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6678003253589365 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49499524187359745 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09441087613293052 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39768749999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33834773936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/m42-health/Llama3-Med42-70B/36ebe051-2bac-46cb-b990-33025df0ccac.json b/data/hfopenllm_v2/meta/m42-health/Llama3-Med42-70B/36ebe051-2bac-46cb-b990-33025df0ccac.json new file mode 100644 index 000000000..8060f56e3 --- /dev/null +++ b/data/hfopenllm_v2/meta/m42-health/Llama3-Med42-70B/36ebe051-2bac-46cb-b990-33025df0ccac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/m42-health_Llama3-Med42-70B/1762652580.328667", + "retrieved_timestamp": "1762652580.328667", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "m42-health/Llama3-Med42-70B", + "developer": "meta", + "inference_platform": "unknown", + "id": "m42-health/Llama3-Med42-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6291074349392944 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6687891109485058 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2258308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34731543624161076 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46289583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4962599734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/maldv/badger-kappa-llama-3-8b/32e1b138-c236-48e3-8152-d3715127d309.json b/data/hfopenllm_v2/meta/maldv/badger-kappa-llama-3-8b/32e1b138-c236-48e3-8152-d3715127d309.json new file mode 100644 index 000000000..dbe17017b --- /dev/null +++ b/data/hfopenllm_v2/meta/maldv/badger-kappa-llama-3-8b/32e1b138-c236-48e3-8152-d3715127d309.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/maldv_badger-kappa-llama-3-8b/1762652580.331178", + "retrieved_timestamp": "1762652580.331179", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "maldv/badger-kappa-llama-3-8b", + "developer": "meta", + "inference_platform": "unknown", + "id": "maldv/badger-kappa-llama-3-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46946435457918323 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5084927997756815 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08610271903323263 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3765104166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3695146276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/maldv/badger-lambda-llama-3-8b/18ae9d71-15e0-4d11-86c0-9cac4dbaa3f3.json b/data/hfopenllm_v2/meta/maldv/badger-lambda-llama-3-8b/18ae9d71-15e0-4d11-86c0-9cac4dbaa3f3.json new file mode 100644 index 000000000..19ffcd821 --- /dev/null +++ b/data/hfopenllm_v2/meta/maldv/badger-lambda-llama-3-8b/18ae9d71-15e0-4d11-86c0-9cac4dbaa3f3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/maldv_badger-lambda-llama-3-8b/1762652580.331519", + "retrieved_timestamp": "1762652580.33152", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "maldv/badger-lambda-llama-3-8b", + "developer": "meta", + "inference_platform": "unknown", + "id": "maldv/badger-lambda-llama-3-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4860758343417687 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49634866510444836 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09441087613293052 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3753645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37666223404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/maldv/badger-mu-llama-3-8b/d43699f9-e6e5-428b-ab52-9d7114443608.json b/data/hfopenllm_v2/meta/maldv/badger-mu-llama-3-8b/d43699f9-e6e5-428b-ab52-9d7114443608.json new file mode 100644 index 000000000..d7df327f2 --- /dev/null +++ b/data/hfopenllm_v2/meta/maldv/badger-mu-llama-3-8b/d43699f9-e6e5-428b-ab52-9d7114443608.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/maldv_badger-mu-llama-3-8b/1762652580.3317509", + "retrieved_timestamp": "1762652580.3317518", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "maldv/badger-mu-llama-3-8b", + "developer": "meta", + "inference_platform": "unknown", + "id": "maldv/badger-mu-llama-3-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49194581488229006 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.514287576852281 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.055891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35545833333333327 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3673537234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/maldv/badger-writer-llama-3-8b/7c88458f-e9a0-4e90-b5ed-dbdb6fd49b9d.json b/data/hfopenllm_v2/meta/maldv/badger-writer-llama-3-8b/7c88458f-e9a0-4e90-b5ed-dbdb6fd49b9d.json new file mode 100644 index 000000000..13b8536b0 --- /dev/null +++ b/data/hfopenllm_v2/meta/maldv/badger-writer-llama-3-8b/7c88458f-e9a0-4e90-b5ed-dbdb6fd49b9d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/maldv_badger-writer-llama-3-8b/1762652580.332005", + "retrieved_timestamp": "1762652580.332005", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "maldv/badger-writer-llama-3-8b", + "developer": "meta", + "inference_platform": "unknown", + "id": "maldv/badger-writer-llama-3-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5303140112678804 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4863893856673737 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0755287009063444 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35809375000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3759973404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/mattshumer/Reflection-Llama-3.1-70B/155f55e9-34e3-4753-a783-31df44e791e0.json b/data/hfopenllm_v2/meta/mattshumer/Reflection-Llama-3.1-70B/155f55e9-34e3-4753-a783-31df44e791e0.json new file mode 100644 index 000000000..88778d7e9 --- /dev/null +++ b/data/hfopenllm_v2/meta/mattshumer/Reflection-Llama-3.1-70B/155f55e9-34e3-4753-a783-31df44e791e0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mattshumer_Reflection-Llama-3.1-70B/1762652580.341989", + "retrieved_timestamp": "1762652580.341989", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mattshumer/Reflection-Llama-3.1-70B", + "developer": "meta", + "inference_platform": "unknown", + "id": "mattshumer/Reflection-Llama-3.1-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00452133671990319 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.645001286484342 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21450151057401812 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36325503355704697 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45765625000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4955119680851064 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/meditsolutions/Llama-3.1-MedIT-SUN-8B/94d286c8-8356-4bdd-ac91-2ce517b6b974.json b/data/hfopenllm_v2/meta/meditsolutions/Llama-3.1-MedIT-SUN-8B/94d286c8-8356-4bdd-ac91-2ce517b6b974.json new file mode 100644 index 000000000..85c7bee4d --- /dev/null +++ b/data/hfopenllm_v2/meta/meditsolutions/Llama-3.1-MedIT-SUN-8B/94d286c8-8356-4bdd-ac91-2ce517b6b974.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.1-MedIT-SUN-8B/1762652580.342782", + "retrieved_timestamp": "1762652580.342783", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meditsolutions/Llama-3.1-MedIT-SUN-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "meditsolutions/Llama-3.1-MedIT-SUN-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7837293935646308 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5186924904597405 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20921450151057402 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40562499999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3916223404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000/85ccad14-a4eb-41c8-b1b7-f2d0215c358a.json b/data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000/85ccad14-a4eb-41c8-b1b7-f2d0215c358a.json new file mode 100644 index 000000000..787458c03 --- /dev/null +++ b/data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000/85ccad14-a4eb-41c8-b1b7-f2d0215c358a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-2.4B-checkpoint-26000/1762652580.3434849", + "retrieved_timestamp": "1762652580.343486", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000", + "developer": "meta", + "inference_platform": "unknown", + "id": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28139447776344545 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3017752699243885 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01812688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41033333333333327 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1344747340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.209 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800/23dca426-d0d9-43d0-86ff-50e01cc292d0.json b/data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800/23dca426-d0d9-43d0-86ff-50e01cc292d0.json new file mode 100644 index 000000000..d689725d3 --- /dev/null +++ b/data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800/23dca426-d0d9-43d0-86ff-50e01cc292d0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-2.4B-checkpoint-34800/1762652580.343692", + "retrieved_timestamp": "1762652580.343693", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800", + "developer": "meta", + "inference_platform": "unknown", + "id": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25009530268576263 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3161124673749052 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4022395833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13572140957446807 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.209 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0/bba22496-6f3a-4ddb-8a69-5995e72aa15f.json b/data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0/bba22496-6f3a-4ddb-8a69-5995e72aa15f.json new file mode 100644 index 000000000..7139fb72d --- /dev/null +++ b/data/hfopenllm_v2/meta/meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0/bba22496-6f3a-4ddb-8a69-5995e72aa15f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-2.4B-v1.0.0/1762652580.343897", + "retrieved_timestamp": "1762652580.343898", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0", + "developer": "meta", + "inference_platform": "unknown", + "id": "meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5636865738462834 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3390826682107771 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06268882175226587 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32094791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15425531914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.472 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/meta-llama/Llama-2-13b-hf/7a0c1d3a-26f5-44d0-8ca1-8ce6db39cb99.json b/data/hfopenllm_v2/meta/meta-llama/Llama-2-13b-hf/7a0c1d3a-26f5-44d0-8ca1-8ce6db39cb99.json new file mode 100644 index 000000000..cc55bd099 --- /dev/null +++ b/data/hfopenllm_v2/meta/meta-llama/Llama-2-13b-hf/7a0c1d3a-26f5-44d0-8ca1-8ce6db39cb99.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-13b-hf/1762652580.3493812", + "retrieved_timestamp": "1762652580.349382", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama/Llama-2-13b-hf", + "developer": "meta", + "inference_platform": "unknown", + "id": "meta-llama/Llama-2-13b-hf" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24824687385027283 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41256242233835055 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23778257978723405 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.016 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/meta-llama/Llama-2-70b-hf/70acb3cd-fea6-481a-8bf4-fa72e953c110.json b/data/hfopenllm_v2/meta/meta-llama/Llama-2-70b-hf/70acb3cd-fea6-481a-8bf4-fa72e953c110.json new file mode 100644 index 000000000..01d7666c2 --- /dev/null +++ b/data/hfopenllm_v2/meta/meta-llama/Llama-2-70b-hf/70acb3cd-fea6-481a-8bf4-fa72e953c110.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-70b-hf/1762652580.3500109", + "retrieved_timestamp": "1762652580.3500118", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama/Llama-2-70b-hf", + "developer": "meta", + "inference_platform": "unknown", + "id": "meta-llama/Llama-2-70b-hf" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2406780675274937 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5472591190449342 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0324773413897281 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41235416666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37175864361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 68.977 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/meta-llama/Llama-2-7b-hf/36fbd2e7-97fa-4ba4-aad2-47bfc225771d.json b/data/hfopenllm_v2/meta/meta-llama/Llama-2-7b-hf/36fbd2e7-97fa-4ba4-aad2-47bfc225771d.json new file mode 100644 index 000000000..fc3674ed4 --- /dev/null +++ b/data/hfopenllm_v2/meta/meta-llama/Llama-2-7b-hf/36fbd2e7-97fa-4ba4-aad2-47bfc225771d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-7b-hf/1762652580.350465", + "retrieved_timestamp": "1762652580.350466", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama/Llama-2-7b-hf", + "developer": "meta", + "inference_platform": "unknown", + "id": "meta-llama/Llama-2-7b-hf" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2518938638368418 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34961958199821835 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.017371601208459216 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37006249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18608710106382978 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/meta-llama/Llama-3.1-70B/88d33049-cd88-4b4a-94ba-d0c35a635cfc.json b/data/hfopenllm_v2/meta/meta-llama/Llama-3.1-70B/88d33049-cd88-4b4a-94ba-d0c35a635cfc.json new file mode 100644 index 000000000..dcb3c82f4 --- /dev/null +++ b/data/hfopenllm_v2/meta/meta-llama/Llama-3.1-70B/88d33049-cd88-4b4a-94ba-d0c35a635cfc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.1-70B/1762652580.350682", + "retrieved_timestamp": "1762652580.350682", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama/Llama-3.1-70B", + "developer": "meta", + "inference_platform": "unknown", + "id": "meta-llama/Llama-3.1-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16843752354862876 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.626006918317161 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18429003021148038 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3875838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4571875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4654255319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/meta-llama/Llama-3.1-8B/58e87619-6244-45b9-8a1f-b2f8f0d0cd31.json b/data/hfopenllm_v2/meta/meta-llama/Llama-3.1-8B/58e87619-6244-45b9-8a1f-b2f8f0d0cd31.json new file mode 100644 index 000000000..baa573bdf --- /dev/null +++ b/data/hfopenllm_v2/meta/meta-llama/Llama-3.1-8B/58e87619-6244-45b9-8a1f-b2f8f0d0cd31.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.1-8B/1762652580.351093", + "retrieved_timestamp": "1762652580.351093", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama/Llama-3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "meta-llama/Llama-3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12459828809780273 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46595905446007296 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06570996978851963 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3811875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32878989361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/meta-llama/Llama-3.2-1B/b4b6a8d2-be7f-4b8f-b280-3e62015a61d3.json b/data/hfopenllm_v2/meta/meta-llama/Llama-3.2-1B/b4b6a8d2-be7f-4b8f-b280-3e62015a61d3.json new file mode 100644 index 000000000..93883390c --- /dev/null +++ b/data/hfopenllm_v2/meta/meta-llama/Llama-3.2-1B/b4b6a8d2-be7f-4b8f-b280-3e62015a61d3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.2-1B/1762652580.3515048", + "retrieved_timestamp": "1762652580.351506", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama/Llama-3.2-1B", + "developer": "meta", + "inference_platform": "unknown", + "id": "meta-llama/Llama-3.2-1B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14777900415342402 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31149540964608097 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22818791946308725 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3447291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12034574468085106 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.24 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/meta-llama/Llama-3.2-3B/19aba348-6bdd-425a-bd7b-505aa2658f6c.json b/data/hfopenllm_v2/meta/meta-llama/Llama-3.2-3B/19aba348-6bdd-425a-bd7b-505aa2658f6c.json new file mode 100644 index 000000000..e8175dd9a --- /dev/null +++ b/data/hfopenllm_v2/meta/meta-llama/Llama-3.2-3B/19aba348-6bdd-425a-bd7b-505aa2658f6c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.2-3B/1762652580.351924", + "retrieved_timestamp": "1762652580.351925", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama/Llama-3.2-3B", + "developer": "meta", + "inference_platform": "unknown", + "id": "meta-llama/Llama-3.2-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13374069690643048 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3905117116991059 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0188821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35771875000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2487533244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/meta-llama/Meta-Llama-3-70B/dddadaa0-6808-4b34-a6e2-29663460c3e0.json b/data/hfopenllm_v2/meta/meta-llama/Meta-Llama-3-70B/dddadaa0-6808-4b34-a6e2-29663460c3e0.json new file mode 100644 index 000000000..fe2fa525f --- /dev/null +++ b/data/hfopenllm_v2/meta/meta-llama/Meta-Llama-3-70B/dddadaa0-6808-4b34-a6e2-29663460c3e0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meta-llama_Meta-Llama-3-70B/1762652580.352541", + "retrieved_timestamp": "1762652580.352541", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama/Meta-Llama-3-70B", + "developer": "meta", + "inference_platform": "unknown", + "id": "meta-llama/Meta-Llama-3-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1603190645265673 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6461074599904467 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18580060422960726 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3976510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4518229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4709109042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/meta-llama/Meta-Llama-3-8B/75f6ae05-a987-455d-8167-fc345d55c370.json b/data/hfopenllm_v2/meta/meta-llama/Meta-Llama-3-8B/75f6ae05-a987-455d-8167-fc345d55c370.json new file mode 100644 index 000000000..e0720a7a2 --- /dev/null +++ b/data/hfopenllm_v2/meta/meta-llama/Meta-Llama-3-8B/75f6ae05-a987-455d-8167-fc345d55c370.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meta-llama_Meta-Llama-3-8B/1762652580.352957", + "retrieved_timestamp": "1762652580.352957", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meta-llama/Meta-Llama-3-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "meta-llama/Meta-Llama-3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14550614591506092 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4597905195240255 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36140625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32097739361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/migtissera/Llama-3-70B-Synthia-v3.5/7ba5e7cb-3050-4838-8762-4b31a5c9d912.json b/data/hfopenllm_v2/meta/migtissera/Llama-3-70B-Synthia-v3.5/7ba5e7cb-3050-4838-8762-4b31a5c9d912.json new file mode 100644 index 000000000..e5560dd77 --- /dev/null +++ b/data/hfopenllm_v2/meta/migtissera/Llama-3-70B-Synthia-v3.5/7ba5e7cb-3050-4838-8762-4b31a5c9d912.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/migtissera_Llama-3-70B-Synthia-v3.5/1762652580.358073", + "retrieved_timestamp": "1762652580.3580742", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "migtissera/Llama-3-70B-Synthia-v3.5", + "developer": "meta", + "inference_platform": "unknown", + "id": "migtissera/Llama-3-70B-Synthia-v3.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6076499244227538 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6488638026271278 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21148036253776434 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3875838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49219791666666673 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4658410904255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/migtissera/Llama-3-8B-Synthia-v3.5/3c843cd0-ce71-4feb-9452-65fc7534518e.json b/data/hfopenllm_v2/meta/migtissera/Llama-3-8B-Synthia-v3.5/3c843cd0-ce71-4feb-9452-65fc7534518e.json new file mode 100644 index 000000000..f1fcd1968 --- /dev/null +++ b/data/hfopenllm_v2/meta/migtissera/Llama-3-8B-Synthia-v3.5/3c843cd0-ce71-4feb-9452-65fc7534518e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/migtissera_Llama-3-8B-Synthia-v3.5/1762652580.358322", + "retrieved_timestamp": "1762652580.358322", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "migtissera/Llama-3-8B-Synthia-v3.5", + "developer": "meta", + "inference_platform": "unknown", + "id": "migtissera/Llama-3-8B-Synthia-v3.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5069582042314393 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4887940933660044 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06570996978851963 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40438541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30302526595744683 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3/ce85152e-fdde-406a-9818-0eb945ff1d6a.json b/data/hfopenllm_v2/meta/mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3/ce85152e-fdde-406a-9818-0eb945ff1d6a.json new file mode 100644 index 000000000..b7df556a9 --- /dev/null +++ b/data/hfopenllm_v2/meta/mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3/ce85152e-fdde-406a-9818-0eb945ff1d6a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mindw96_DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3/1762652580.360158", + "retrieved_timestamp": "1762652580.360159", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3", + "developer": "meta", + "inference_platform": "unknown", + "id": "mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13881168632561602 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3067536965504715 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.008308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25083892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3792083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11062167553191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/mkurman/llama-3.2-MEDIT-3B-o1/43a51d6d-e038-4476-a63b-2f4260d736d4.json b/data/hfopenllm_v2/meta/mkurman/llama-3.2-MEDIT-3B-o1/43a51d6d-e038-4476-a63b-2f4260d736d4.json new file mode 100644 index 000000000..54943f8df --- /dev/null +++ b/data/hfopenllm_v2/meta/mkurman/llama-3.2-MEDIT-3B-o1/43a51d6d-e038-4476-a63b-2f4260d736d4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mkurman_llama-3.2-MEDIT-3B-o1/1762652580.365804", + "retrieved_timestamp": "1762652580.3658051", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mkurman/llama-3.2-MEDIT-3B-o1", + "developer": "meta", + "inference_platform": "unknown", + "id": "mkurman/llama-3.2-MEDIT-3B-o1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43816517950150047 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43996584807961553 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13066465256797583 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3565416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27410239361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.607 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/mkxu/llama-3-8b-po1/e26ea6fd-723d-45de-b0f1-5bcbae1eb992.json b/data/hfopenllm_v2/meta/mkxu/llama-3-8b-po1/e26ea6fd-723d-45de-b0f1-5bcbae1eb992.json new file mode 100644 index 000000000..0d376bb51 --- /dev/null +++ b/data/hfopenllm_v2/meta/mkxu/llama-3-8b-po1/e26ea6fd-723d-45de-b0f1-5bcbae1eb992.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mkxu_llama-3-8b-po1/1762652580.3669372", + "retrieved_timestamp": "1762652580.366938", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mkxu/llama-3-8b-po1", + "developer": "meta", + "inference_platform": "unknown", + "id": "mkxu/llama-3-8b-po1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4081149128756145 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49760854852246356 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0702416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3804166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3562167553191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/mlabonne/ChimeraLlama-3-8B-v2/fd31a5f1-986e-4040-b04b-3018161e6e66.json b/data/hfopenllm_v2/meta/mlabonne/ChimeraLlama-3-8B-v2/fd31a5f1-986e-4040-b04b-3018161e6e66.json new file mode 100644 index 000000000..06379d456 --- /dev/null +++ b/data/hfopenllm_v2/meta/mlabonne/ChimeraLlama-3-8B-v2/fd31a5f1-986e-4040-b04b-3018161e6e66.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mlabonne_ChimeraLlama-3-8B-v2/1762652580.3680582", + "retrieved_timestamp": "1762652580.3680582", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mlabonne/ChimeraLlama-3-8B-v2", + "developer": "meta", + "inference_platform": "unknown", + "id": "mlabonne/ChimeraLlama-3-8B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44688315890725494 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5045597361952603 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09063444108761329 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3790833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3568816489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/mlabonne/ChimeraLlama-3-8B-v3/eef221de-8dc3-410a-943d-900c810948ae.json b/data/hfopenllm_v2/meta/mlabonne/ChimeraLlama-3-8B-v3/eef221de-8dc3-410a-943d-900c810948ae.json new file mode 100644 index 000000000..031237108 --- /dev/null +++ b/data/hfopenllm_v2/meta/mlabonne/ChimeraLlama-3-8B-v3/eef221de-8dc3-410a-943d-900c810948ae.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mlabonne_ChimeraLlama-3-8B-v3/1762652580.3683012", + "retrieved_timestamp": "1762652580.3683012", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mlabonne/ChimeraLlama-3-8B-v3", + "developer": "meta", + "inference_platform": "unknown", + "id": "mlabonne/ChimeraLlama-3-8B-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44078821970150317 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49781902726529204 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08836858006042296 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4003541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36685505319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/mlabonne/Hermes-3-Llama-3.1-70B-lorablated/07190707-16fb-47fc-9813-4f2408a04bdb.json b/data/hfopenllm_v2/meta/mlabonne/Hermes-3-Llama-3.1-70B-lorablated/07190707-16fb-47fc-9813-4f2408a04bdb.json new file mode 100644 index 000000000..54ce54271 --- /dev/null +++ b/data/hfopenllm_v2/meta/mlabonne/Hermes-3-Llama-3.1-70B-lorablated/07190707-16fb-47fc-9813-4f2408a04bdb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mlabonne_Hermes-3-Llama-3.1-70B-lorablated/1762652580.368906", + "retrieved_timestamp": "1762652580.368906", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mlabonne/Hermes-3-Llama-3.1-70B-lorablated", + "developer": "meta", + "inference_platform": "unknown", + "id": "mlabonne/Hermes-3-Llama-3.1-70B-lorablated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34244360518978534 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6693171063183693 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2243202416918429 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36577181208053694 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5029270833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4679188829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/mlabonne/OrpoLlama-3-8B/b8b5b30e-d259-49ae-8155-7f63ddae88c8.json b/data/hfopenllm_v2/meta/mlabonne/OrpoLlama-3-8B/b8b5b30e-d259-49ae-8155-7f63ddae88c8.json new file mode 100644 index 000000000..fa4bed186 --- /dev/null +++ b/data/hfopenllm_v2/meta/mlabonne/OrpoLlama-3-8B/b8b5b30e-d259-49ae-8155-7f63ddae88c8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mlabonne_OrpoLlama-3-8B/1762652580.369958", + "retrieved_timestamp": "1762652580.3699589", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mlabonne/OrpoLlama-3-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "mlabonne/OrpoLlama-3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36527524745453177 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4424079063503051 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.055891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3579375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2705285904255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/mmnga/Llama-3-70B-japanese-suzume-vector-v0.1/56f52103-ea5e-4228-ac7b-3c6929fe5b76.json b/data/hfopenllm_v2/meta/mmnga/Llama-3-70B-japanese-suzume-vector-v0.1/56f52103-ea5e-4228-ac7b-3c6929fe5b76.json new file mode 100644 index 000000000..7820dd974 --- /dev/null +++ b/data/hfopenllm_v2/meta/mmnga/Llama-3-70B-japanese-suzume-vector-v0.1/56f52103-ea5e-4228-ac7b-3c6929fe5b76.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mmnga_Llama-3-70B-japanese-suzume-vector-v0.1/1762652580.370961", + "retrieved_timestamp": "1762652580.370962", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mmnga/Llama-3-70B-japanese-suzume-vector-v0.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "mmnga/Llama-3-70B-japanese-suzume-vector-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4648931501748693 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6541763652331517 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2326283987915408 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4140625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5224401595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1/09ec0c0c-d403-4f23-99a4-61196c70734d.json b/data/hfopenllm_v2/meta/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1/09ec0c0c-d403-4f23-99a4-61196c70734d.json new file mode 100644 index 000000000..35a5a6374 --- /dev/null +++ b/data/hfopenllm_v2/meta/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1/09ec0c0c-d403-4f23-99a4-61196c70734d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mobiuslabsgmbh_DeepSeek-R1-ReDistill-Llama3-8B-v1.1/1762652580.371218", + "retrieved_timestamp": "1762652580.371218", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.370396104558128 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34730320150504124 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3285498489425982 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33955208333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2198304521276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/mukaj/Llama-3.1-Hawkish-8B/b94f468b-7c0e-491e-8404-de1bad7ff0f0.json b/data/hfopenllm_v2/meta/mukaj/Llama-3.1-Hawkish-8B/b94f468b-7c0e-491e-8404-de1bad7ff0f0.json new file mode 100644 index 000000000..38a1fd16d --- /dev/null +++ b/data/hfopenllm_v2/meta/mukaj/Llama-3.1-Hawkish-8B/b94f468b-7c0e-491e-8404-de1bad7ff0f0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mukaj_Llama-3.1-Hawkish-8B/1762652580.3748438", + "retrieved_timestamp": "1762652580.374845", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mukaj/Llama-3.1-Hawkish-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "mukaj/Llama-3.1-Hawkish-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6720468357291984 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4883822828416351 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.243202416918429 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39672916666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33311170212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/nbeerbower/Llama-3.1-Nemotron-lorablated-70B/a9af8b88-8f00-4662-8ca4-d042030885ae.json b/data/hfopenllm_v2/meta/nbeerbower/Llama-3.1-Nemotron-lorablated-70B/a9af8b88-8f00-4662-8ca4-d042030885ae.json new file mode 100644 index 000000000..f5119fa88 --- /dev/null +++ b/data/hfopenllm_v2/meta/nbeerbower/Llama-3.1-Nemotron-lorablated-70B/a9af8b88-8f00-4662-8ca4-d042030885ae.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Llama-3.1-Nemotron-lorablated-70B/1762652580.379643", + "retrieved_timestamp": "1762652580.379644", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Llama-3.1-Nemotron-lorablated-70B", + "developer": "meta", + "inference_platform": "unknown", + "id": "nbeerbower/Llama-3.1-Nemotron-lorablated-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7228797368759337 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6825051293384551 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3338368580060423 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39093959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4681666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5343251329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/nbeerbower/Llama3.1-Gutenberg-Doppel-70B/fffd0da2-d4b0-4a11-9fd4-c0dfa0c70431.json b/data/hfopenllm_v2/meta/nbeerbower/Llama3.1-Gutenberg-Doppel-70B/fffd0da2-d4b0-4a11-9fd4-c0dfa0c70431.json new file mode 100644 index 000000000..705f4dbbc --- /dev/null +++ b/data/hfopenllm_v2/meta/nbeerbower/Llama3.1-Gutenberg-Doppel-70B/fffd0da2-d4b0-4a11-9fd4-c0dfa0c70431.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Llama3.1-Gutenberg-Doppel-70B/1762652580.379898", + "retrieved_timestamp": "1762652580.3798988", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Llama3.1-Gutenberg-Doppel-70B", + "developer": "meta", + "inference_platform": "unknown", + "id": "nbeerbower/Llama3.1-Gutenberg-Doppel-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7092159913474027 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6660891255994471 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2122356495468278 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3447986577181208 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48971875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4736535904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/nbeerbower/llama-3-gutenberg-8B/144ff584-3230-42e5-acae-35518b10a1e9.json b/data/hfopenllm_v2/meta/nbeerbower/llama-3-gutenberg-8B/144ff584-3230-42e5-acae-35518b10a1e9.json new file mode 100644 index 000000000..e5f1552ae --- /dev/null +++ b/data/hfopenllm_v2/meta/nbeerbower/llama-3-gutenberg-8B/144ff584-3230-42e5-acae-35518b10a1e9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_llama-3-gutenberg-8B/1762652580.3850691", + "retrieved_timestamp": "1762652580.385074", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/llama-3-gutenberg-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "nbeerbower/llama-3-gutenberg-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4371910973993448 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49936002561994197 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07854984894259819 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40730208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.383061835106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/nbeerbower/llama3.1-cc-8B/e011ff58-ea5c-4857-a76d-503c4188886f.json b/data/hfopenllm_v2/meta/nbeerbower/llama3.1-cc-8B/e011ff58-ea5c-4857-a76d-503c4188886f.json new file mode 100644 index 000000000..db13ce9d0 --- /dev/null +++ b/data/hfopenllm_v2/meta/nbeerbower/llama3.1-cc-8B/e011ff58-ea5c-4857-a76d-503c4188886f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_llama3.1-cc-8B/1762652580.385431", + "retrieved_timestamp": "1762652580.385432", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/llama3.1-cc-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "nbeerbower/llama3.1-cc-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5068086011782071 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4871187428614386 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07099697885196375 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38851041666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3346908244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/nbeerbower/llama3.1-kartoffeldes-70B/c17cced5-be98-49c5-a919-c15b641ba2e7.json b/data/hfopenllm_v2/meta/nbeerbower/llama3.1-kartoffeldes-70B/c17cced5-be98-49c5-a919-c15b641ba2e7.json new file mode 100644 index 000000000..412b891fd --- /dev/null +++ b/data/hfopenllm_v2/meta/nbeerbower/llama3.1-kartoffeldes-70B/c17cced5-be98-49c5-a919-c15b641ba2e7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_llama3.1-kartoffeldes-70B/1762652580.385698", + "retrieved_timestamp": "1762652580.385699", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/llama3.1-kartoffeldes-70B", + "developer": "meta", + "inference_platform": "unknown", + "id": "nbeerbower/llama3.1-kartoffeldes-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8230218043679659 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6893878613110068 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3217522658610272 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35151006711409394 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46460416666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4988364361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/necva/IE-cont-Llama3.1-8B/43f5a551-7257-4595-9b0c-60799ade231b.json b/data/hfopenllm_v2/meta/necva/IE-cont-Llama3.1-8B/43f5a551-7257-4595-9b0c-60799ade231b.json new file mode 100644 index 000000000..6dc445014 --- /dev/null +++ b/data/hfopenllm_v2/meta/necva/IE-cont-Llama3.1-8B/43f5a551-7257-4595-9b0c-60799ade231b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/necva_IE-cont-Llama3.1-8B/1762652580.3888798", + "retrieved_timestamp": "1762652580.388881", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "necva/IE-cont-Llama3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "necva/IE-cont-Llama3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20490742341431845 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911778102988436 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35753125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11668882978723404 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/netcat420/Llama3.1-MFANN-8b/aa3467df-1a74-47af-b635-0318df88dd58.json b/data/hfopenllm_v2/meta/netcat420/Llama3.1-MFANN-8b/aa3467df-1a74-47af-b635-0318df88dd58.json new file mode 100644 index 000000000..983249bc6 --- /dev/null +++ b/data/hfopenllm_v2/meta/netcat420/Llama3.1-MFANN-8b/aa3467df-1a74-47af-b635-0318df88dd58.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_Llama3.1-MFANN-8b/1762652580.3921962", + "retrieved_timestamp": "1762652580.3921971", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/Llama3.1-MFANN-8b", + "developer": "meta", + "inference_platform": "unknown", + "id": "netcat420/Llama3.1-MFANN-8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29695651981187693 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4281154680742545 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33790625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27252327127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2/a9c38a44-a973-4bfd-a1f1-aa094d5e37fd.json b/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2/a9c38a44-a973-4bfd-a1f1-aa094d5e37fd.json new file mode 100644 index 000000000..8b06083b2 --- /dev/null +++ b/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2/a9c38a44-a973-4bfd-a1f1-aa094d5e37fd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-SLERP-TIES-V2/1762652580.3924491", + "retrieved_timestamp": "1762652580.39245", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2", + "developer": "meta", + "inference_platform": "unknown", + "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4209796672828096 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49237606236472237 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07628398791540786 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37276041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35222739361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3/e5a71267-56c7-418a-bfcc-b4b5ed10496e.json b/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3/e5a71267-56c7-418a-bfcc-b4b5ed10496e.json new file mode 100644 index 000000000..c11689bc3 --- /dev/null +++ b/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3/e5a71267-56c7-418a-bfcc-b4b5ed10496e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-SLERP-TIES-V3/1762652580.3926558", + "retrieved_timestamp": "1762652580.3926558", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3", + "developer": "meta", + "inference_platform": "unknown", + "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4238021782204551 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4914021594225444 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0755287009063444 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37406249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34898603723404253 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4/12a56879-c48c-4422-bc6f-fad813c94414.json b/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4/12a56879-c48c-4422-bc6f-fad813c94414.json new file mode 100644 index 000000000..17effa7de --- /dev/null +++ b/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4/12a56879-c48c-4422-bc6f-fad813c94414.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-SLERP-V4/1762652580.39286", + "retrieved_timestamp": "1762652580.392861", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4", + "developer": "meta", + "inference_platform": "unknown", + "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41688275996577967 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4908971108837563 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38209374999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35164561170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5/d52d6e93-b291-4f21-aca7-2c8d48313dec.json b/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5/d52d6e93-b291-4f21-aca7-2c8d48313dec.json new file mode 100644 index 000000000..62b2d4d30 --- /dev/null +++ b/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5/d52d6e93-b291-4f21-aca7-2c8d48313dec.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-SLERP-V5/1762652580.393064", + "retrieved_timestamp": "1762652580.393065", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5", + "developer": "meta", + "inference_platform": "unknown", + "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4328947193446721 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4951892200623516 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08157099697885196 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.378125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3444980053191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES/c5a71d25-35f7-453e-9551-7881046fdeff.json b/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES/c5a71d25-35f7-453e-9551-7881046fdeff.json new file mode 100644 index 000000000..1f2852384 --- /dev/null +++ b/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES/c5a71d25-35f7-453e-9551-7881046fdeff.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-Slerp-TIES/1762652580.393313", + "retrieved_timestamp": "1762652580.393313", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES", + "developer": "meta", + "inference_platform": "unknown", + "id": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42934746472692453 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49675121796238325 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3686979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3531416223404255 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2/1ef7ee4e-ab54-4e5a-b27f-4d6aeffd3f54.json b/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2/1ef7ee4e-ab54-4e5a-b27f-4d6aeffd3f54.json new file mode 100644 index 000000000..098ad84d8 --- /dev/null +++ b/data/hfopenllm_v2/meta/netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2/1ef7ee4e-ab54-4e5a-b27f-4d6aeffd3f54.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-Slerp-V3.2/1762652580.3935192", + "retrieved_timestamp": "1762652580.39352", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2", + "developer": "meta", + "inference_platform": "unknown", + "id": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41281134057633745 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49782535474346185 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0702416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37542708333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3527260638297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-Abliterated-SLERP/3d3862a4-79df-488c-8d17-dc332fa3abce.json b/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-Abliterated-SLERP/3d3862a4-79df-488c-8d17-dc332fa3abce.json new file mode 100644 index 000000000..bedbff83c --- /dev/null +++ b/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-Abliterated-SLERP/3d3862a4-79df-488c-8d17-dc332fa3abce.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-llama3.1-Abliterated-SLERP/1762652580.394179", + "retrieved_timestamp": "1762652580.39418", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN-llama3.1-Abliterated-SLERP", + "developer": "meta", + "inference_platform": "unknown", + "id": "netcat420/MFANN-llama3.1-Abliterated-SLERP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25906262051357065 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45744999460878283 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04833836858006042 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3809166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2928025265957447 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1/71e87ce8-88f2-4858-b65f-9225f59cc3f9.json b/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1/71e87ce8-88f2-4858-b65f-9225f59cc3f9.json new file mode 100644 index 000000000..23e2ebd21 --- /dev/null +++ b/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1/71e87ce8-88f2-4858-b65f-9225f59cc3f9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-llama3.1-abliterated-SLERP-v3.1/1762652580.394599", + "retrieved_timestamp": "1762652580.3946", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4201551882338861 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.492068920606988 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06948640483383686 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3686354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3543051861702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-SLERP-v3/73f2659d-ff95-403f-99e0-09de7c807c3c.json b/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-SLERP-v3/73f2659d-ff95-403f-99e0-09de7c807c3c.json new file mode 100644 index 000000000..c2846d274 --- /dev/null +++ b/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-SLERP-v3/73f2659d-ff95-403f-99e0-09de7c807c3c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-llama3.1-abliterated-SLERP-v3/1762652580.394387", + "retrieved_timestamp": "1762652580.394388", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3", + "developer": "meta", + "inference_platform": "unknown", + "id": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37993856301280604 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49305765460927126 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06419939577039276 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36603125000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35305851063829785 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-v2/46728c83-957a-4eb7-8a04-0fee4efe50d1.json b/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-v2/46728c83-957a-4eb7-8a04-0fee4efe50d1.json new file mode 100644 index 000000000..300d4c5a0 --- /dev/null +++ b/data/hfopenllm_v2/meta/netcat420/MFANN-llama3.1-abliterated-v2/46728c83-957a-4eb7-8a04-0fee4efe50d1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-llama3.1-abliterated-v2/1762652580.3948102", + "retrieved_timestamp": "1762652580.394811", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN-llama3.1-abliterated-v2", + "developer": "meta", + "inference_platform": "unknown", + "id": "netcat420/MFANN-llama3.1-abliterated-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4429114748866341 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4940829733015402 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07401812688821752 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3845416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3490691489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ngxson/MiniThinky-1B-Llama-3.2/3a05547d-850b-42b5-978d-0aff574cb5ca.json b/data/hfopenllm_v2/meta/ngxson/MiniThinky-1B-Llama-3.2/3a05547d-850b-42b5-978d-0aff574cb5ca.json new file mode 100644 index 000000000..7a4062a16 --- /dev/null +++ b/data/hfopenllm_v2/meta/ngxson/MiniThinky-1B-Llama-3.2/3a05547d-850b-42b5-978d-0aff574cb5ca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ngxson_MiniThinky-1B-Llama-3.2/1762652580.4050229", + "retrieved_timestamp": "1762652580.4050229", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ngxson/MiniThinky-1B-Llama-3.2", + "developer": "meta", + "inference_platform": "unknown", + "id": "ngxson/MiniThinky-1B-Llama-3.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2771479673931834 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31422650382721545 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23909395973154363 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34336458333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1146941489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ngxson/MiniThinky-v2-1B-Llama-3.2/f37d1682-5df9-45dc-92ae-6bf587a03e9b.json b/data/hfopenllm_v2/meta/ngxson/MiniThinky-v2-1B-Llama-3.2/f37d1682-5df9-45dc-92ae-6bf587a03e9b.json new file mode 100644 index 000000000..24296b738 --- /dev/null +++ b/data/hfopenllm_v2/meta/ngxson/MiniThinky-v2-1B-Llama-3.2/f37d1682-5df9-45dc-92ae-6bf587a03e9b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ngxson_MiniThinky-v2-1B-Llama-3.2/1762652580.405281", + "retrieved_timestamp": "1762652580.405282", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ngxson/MiniThinky-v2-1B-Llama-3.2", + "developer": "meta", + "inference_platform": "unknown", + "id": "ngxson/MiniThinky-v2-1B-Llama-3.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2963071317437732 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32051111358951634 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028700906344410877 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23993288590604026 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3356145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1116190159574468 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r16-0to512tokens-test/8fb0f696-49a8-4611-ad82-3b7e19d5d867.json b/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r16-0to512tokens-test/8fb0f696-49a8-4611-ad82-3b7e19d5d867.json new file mode 100644 index 000000000..909aeaff2 --- /dev/null +++ b/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r16-0to512tokens-test/8fb0f696-49a8-4611-ad82-3b7e19d5d867.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/noname0202_llama-math-1b-r16-0to512tokens-test/1762652580.4104571", + "retrieved_timestamp": "1762652580.410458", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "noname0202/llama-math-1b-r16-0to512tokens-test", + "developer": "meta", + "inference_platform": "unknown", + "id": "noname0202/llama-math-1b-r16-0to512tokens-test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5469753587148765 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34884166022601404 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08157099697885196 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3143125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17278922872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r32-0to512tokens-test/5623295c-0170-4832-b3e9-df00c660c59b.json b/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r32-0to512tokens-test/5623295c-0170-4832-b3e9-df00c660c59b.json new file mode 100644 index 000000000..413ac1a67 --- /dev/null +++ b/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r32-0to512tokens-test/5623295c-0170-4832-b3e9-df00c660c59b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/noname0202_llama-math-1b-r32-0to512tokens-test/1762652580.410711", + "retrieved_timestamp": "1762652580.410711", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "noname0202/llama-math-1b-r32-0to512tokens-test", + "developer": "meta", + "inference_platform": "unknown", + "id": "noname0202/llama-math-1b-r32-0to512tokens-test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5682577782505973 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3495183139510159 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09063444108761329 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32094791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17603058510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r32-test/6c3ed9db-730c-48cb-95f9-662467957403.json b/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r32-test/6c3ed9db-730c-48cb-95f9-662467957403.json new file mode 100644 index 000000000..8262d7c62 --- /dev/null +++ b/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r32-test/6c3ed9db-730c-48cb-95f9-662467957403.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/noname0202_llama-math-1b-r32-test/1762652580.410917", + "retrieved_timestamp": "1762652580.410918", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "noname0202/llama-math-1b-r32-test", + "developer": "meta", + "inference_platform": "unknown", + "id": "noname0202/llama-math-1b-r32-test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5819215237791282 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3485960127764988 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07250755287009064 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31564583333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17810837765957446 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r8-512tokens-test/c9d6f048-95b8-44ea-9d17-9d9f2d4854b4.json b/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r8-512tokens-test/c9d6f048-95b8-44ea-9d17-9d9f2d4854b4.json new file mode 100644 index 000000000..6d15fe9e5 --- /dev/null +++ b/data/hfopenllm_v2/meta/noname0202/llama-math-1b-r8-512tokens-test/c9d6f048-95b8-44ea-9d17-9d9f2d4854b4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/noname0202_llama-math-1b-r8-512tokens-test/1762652580.411124", + "retrieved_timestamp": "1762652580.411125", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "noname0202/llama-math-1b-r8-512tokens-test", + "developer": "meta", + "inference_platform": "unknown", + "id": "noname0202/llama-math-1b-r8-512tokens-test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5791987482103043 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3495762462148306 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08157099697885196 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31694791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17528257978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/nvidia/Llama-3.1-Minitron-4B-Depth-Base/98402d5d-95a6-4f48-9745-8653b298b48e.json b/data/hfopenllm_v2/meta/nvidia/Llama-3.1-Minitron-4B-Depth-Base/98402d5d-95a6-4f48-9745-8653b298b48e.json new file mode 100644 index 000000000..ae59004de --- /dev/null +++ b/data/hfopenllm_v2/meta/nvidia/Llama-3.1-Minitron-4B-Depth-Base/98402d5d-95a6-4f48-9745-8653b298b48e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nvidia_Llama-3.1-Minitron-4B-Depth-Base/1762652580.4147708", + "retrieved_timestamp": "1762652580.414772", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nvidia/Llama-3.1-Minitron-4B-Depth-Base", + "developer": "meta", + "inference_platform": "unknown", + "id": "nvidia/Llama-3.1-Minitron-4B-Depth-Base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16069362624502986 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4170704193104893 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40106250000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2798371010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.02 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/nvidia/OpenMath2-Llama3.1-8B/31c103fc-22ab-44a0-aeaf-769a9ff803df.json b/data/hfopenllm_v2/meta/nvidia/OpenMath2-Llama3.1-8B/31c103fc-22ab-44a0-aeaf-769a9ff803df.json new file mode 100644 index 000000000..07ff88074 --- /dev/null +++ b/data/hfopenllm_v2/meta/nvidia/OpenMath2-Llama3.1-8B/31c103fc-22ab-44a0-aeaf-769a9ff803df.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nvidia_OpenMath2-Llama3.1-8B/1762652580.416384", + "retrieved_timestamp": "1762652580.416384", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nvidia/OpenMath2-Llama3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "nvidia/OpenMath2-Llama3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23305939352030391 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40955241401694514 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2673716012084592 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34355208333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15533577127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/ontocord/Llama_3.2_1b-autoredteam_helpfulness-train/8277cf4f-865b-4b3e-afcb-b906064dfc20.json b/data/hfopenllm_v2/meta/ontocord/Llama_3.2_1b-autoredteam_helpfulness-train/8277cf4f-865b-4b3e-afcb-b906064dfc20.json new file mode 100644 index 000000000..a21fde5c4 --- /dev/null +++ b/data/hfopenllm_v2/meta/ontocord/Llama_3.2_1b-autoredteam_helpfulness-train/8277cf4f-865b-4b3e-afcb-b906064dfc20.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_Llama_3.2_1b-autoredteam_helpfulness-train/1762652580.417561", + "retrieved_timestamp": "1762652580.417561", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/Llama_3.2_1b-autoredteam_helpfulness-train", + "developer": "meta", + "inference_platform": "unknown", + "id": "ontocord/Llama_3.2_1b-autoredteam_helpfulness-train" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2765484470094904 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31150775306414563 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.345875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11319813829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.498 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/oopere/Llama-FinSent-S/8b9ec467-1555-415c-b1ee-23be18ded9e5.json b/data/hfopenllm_v2/meta/oopere/Llama-FinSent-S/8b9ec467-1555-415c-b1ee-23be18ded9e5.json new file mode 100644 index 000000000..67b46bbba --- /dev/null +++ b/data/hfopenllm_v2/meta/oopere/Llama-FinSent-S/8b9ec467-1555-415c-b1ee-23be18ded9e5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/oopere_Llama-FinSent-S/1762652580.4263492", + "retrieved_timestamp": "1762652580.42635", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "oopere/Llama-FinSent-S", + "developer": "meta", + "inference_platform": "unknown", + "id": "oopere/Llama-FinSent-S" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2163980460733077 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3169254117559263 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3831770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11336436170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.914 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/oopere/Llama-FinSent-S/f99bad90-e7b2-4205-9f51-93f96e90188c.json b/data/hfopenllm_v2/meta/oopere/Llama-FinSent-S/f99bad90-e7b2-4205-9f51-93f96e90188c.json new file mode 100644 index 000000000..7f183eed5 --- /dev/null +++ b/data/hfopenllm_v2/meta/oopere/Llama-FinSent-S/f99bad90-e7b2-4205-9f51-93f96e90188c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/oopere_Llama-FinSent-S/1762652580.426095", + "retrieved_timestamp": "1762652580.426095", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "oopere/Llama-FinSent-S", + "developer": "meta", + "inference_platform": "unknown", + "id": "oopere/Llama-FinSent-S" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21187670935340452 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31562055310321474 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01812688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3832395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11303191489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.914 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/oopere/pruned10-llama-3.2-3B/2ff7d218-348b-4069-808f-6b32e7a77a5b.json b/data/hfopenllm_v2/meta/oopere/pruned10-llama-3.2-3B/2ff7d218-348b-4069-808f-6b32e7a77a5b.json new file mode 100644 index 000000000..57ecad51c --- /dev/null +++ b/data/hfopenllm_v2/meta/oopere/pruned10-llama-3.2-3B/2ff7d218-348b-4069-808f-6b32e7a77a5b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/oopere_pruned10-llama-3.2-3B/1762652580.426529", + "retrieved_timestamp": "1762652580.4265301", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "oopere/pruned10-llama-3.2-3B", + "developer": "meta", + "inference_platform": "unknown", + "id": "oopere/pruned10-llama-3.2-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17762980004166723 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3340421117164456 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3721666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16397938829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.001 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/oopere/pruned20-llama-1b/c86ed5b4-8793-424a-a5a2-9a54689cb388.json b/data/hfopenllm_v2/meta/oopere/pruned20-llama-1b/c86ed5b4-8793-424a-a5a2-9a54689cb388.json new file mode 100644 index 000000000..effa46760 --- /dev/null +++ b/data/hfopenllm_v2/meta/oopere/pruned20-llama-1b/c86ed5b4-8793-424a-a5a2-9a54689cb388.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/oopere_pruned20-llama-1b/1762652580.426731", + "retrieved_timestamp": "1762652580.426732", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "oopere/pruned20-llama-1b", + "developer": "meta", + "inference_platform": "unknown", + "id": "oopere/pruned20-llama-1b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19936213690784896 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30313627830972034 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36314583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11228390957446809 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.075 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/oopere/pruned20-llama-3.2-3b/e0e6bdbd-91c2-4d45-be73-03890ed13709.json b/data/hfopenllm_v2/meta/oopere/pruned20-llama-3.2-3b/e0e6bdbd-91c2-4d45-be73-03890ed13709.json new file mode 100644 index 000000000..b0a9fd454 --- /dev/null +++ b/data/hfopenllm_v2/meta/oopere/pruned20-llama-3.2-3b/e0e6bdbd-91c2-4d45-be73-03890ed13709.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/oopere_pruned20-llama-3.2-3b/1762652580.4269419", + "retrieved_timestamp": "1762652580.426943", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "oopere/pruned20-llama-3.2-3b", + "developer": "meta", + "inference_platform": "unknown", + "id": "oopere/pruned20-llama-3.2-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17887870849346402 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32478483912909756 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34184375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12799202127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.79 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/oopere/pruned40-llama-1b/0032ea65-98dc-48a9-90e7-835e389acecd.json b/data/hfopenllm_v2/meta/oopere/pruned40-llama-1b/0032ea65-98dc-48a9-90e7-835e389acecd.json new file mode 100644 index 000000000..3c37e7dd7 --- /dev/null +++ b/data/hfopenllm_v2/meta/oopere/pruned40-llama-1b/0032ea65-98dc-48a9-90e7-835e389acecd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/oopere_pruned40-llama-1b/1762652580.427145", + "retrieved_timestamp": "1762652580.427145", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "oopere/pruned40-llama-1b", + "developer": "meta", + "inference_platform": "unknown", + "id": "oopere/pruned40-llama-1b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22843832143157933 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29691563801419935 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0075528700906344415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24328859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4286666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10821143617021277 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.914 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/oopere/pruned40-llama-3.2-1B/bae27b4d-4046-45f1-b798-8356fa962df4.json b/data/hfopenllm_v2/meta/oopere/pruned40-llama-3.2-1B/bae27b4d-4046-45f1-b798-8356fa962df4.json new file mode 100644 index 000000000..19cf0bb38 --- /dev/null +++ b/data/hfopenllm_v2/meta/oopere/pruned40-llama-3.2-1B/bae27b4d-4046-45f1-b798-8356fa962df4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/oopere_pruned40-llama-3.2-1B/1762652580.427387", + "retrieved_timestamp": "1762652580.427387", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "oopere/pruned40-llama-3.2-1B", + "developer": "meta", + "inference_platform": "unknown", + "id": "oopere/pruned40-llama-3.2-1B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22663976028050017 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2982489713475327 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.008308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43523958333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11145279255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.914 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/oopere/pruned40-llama-3.2-3b/97c9b209-b2ed-439f-9b01-cad25e205fa9.json b/data/hfopenllm_v2/meta/oopere/pruned40-llama-3.2-3b/97c9b209-b2ed-439f-9b01-cad25e205fa9.json new file mode 100644 index 000000000..c5d52fea5 --- /dev/null +++ b/data/hfopenllm_v2/meta/oopere/pruned40-llama-3.2-3b/97c9b209-b2ed-439f-9b01-cad25e205fa9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/oopere_pruned40-llama-3.2-3b/1762652580.4275908", + "retrieved_timestamp": "1762652580.4275908", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "oopere/pruned40-llama-3.2-3b", + "developer": "meta", + "inference_platform": "unknown", + "id": "oopere/pruned40-llama-3.2-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21829634259320824 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31671170280977073 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3539375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11768617021276596 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.367 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/oopere/pruned60-llama-1b/4c0ac526-821a-49eb-9eee-152d594ed25b.json b/data/hfopenllm_v2/meta/oopere/pruned60-llama-1b/4c0ac526-821a-49eb-9eee-152d594ed25b.json new file mode 100644 index 000000000..787cc1884 --- /dev/null +++ b/data/hfopenllm_v2/meta/oopere/pruned60-llama-1b/4c0ac526-821a-49eb-9eee-152d594ed25b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/oopere_pruned60-llama-1b/1762652580.4277859", + "retrieved_timestamp": "1762652580.4277859", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "oopere/pruned60-llama-1b", + "developer": "meta", + "inference_platform": "unknown", + "id": "oopere/pruned60-llama-1b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18285039251408486 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3016193474185398 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0022658610271903325 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24916107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40879166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11727061170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.753 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/oopere/pruned60-llama-3.2-3b/219c6f49-3d48-4e1b-8105-fdf323b2fc3c.json b/data/hfopenllm_v2/meta/oopere/pruned60-llama-3.2-3b/219c6f49-3d48-4e1b-8105-fdf323b2fc3c.json new file mode 100644 index 000000000..cb3ab8472 --- /dev/null +++ b/data/hfopenllm_v2/meta/oopere/pruned60-llama-3.2-3b/219c6f49-3d48-4e1b-8105-fdf323b2fc3c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/oopere_pruned60-llama-3.2-3b/1762652580.42798", + "retrieved_timestamp": "1762652580.4279811", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "oopere/pruned60-llama-3.2-3b", + "developer": "meta", + "inference_platform": "unknown", + "id": "oopere/pruned60-llama-3.2-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1824758307956223 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31662597093352013 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0037764350453172208 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3633333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11311502659574468 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.944 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/orai-nlp/Llama-eus-8B/0ed99007-3e31-4c48-abe5-0cd94b95dcf4.json b/data/hfopenllm_v2/meta/orai-nlp/Llama-eus-8B/0ed99007-3e31-4c48-abe5-0cd94b95dcf4.json new file mode 100644 index 000000000..56c8e1b91 --- /dev/null +++ b/data/hfopenllm_v2/meta/orai-nlp/Llama-eus-8B/0ed99007-3e31-4c48-abe5-0cd94b95dcf4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/orai-nlp_Llama-eus-8B/1762652580.43225", + "retrieved_timestamp": "1762652580.432275", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "orai-nlp/Llama-eus-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "orai-nlp/Llama-eus-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21612321972366655 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4418245490788701 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04682779456193353 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3918854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30576795212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/princeton-nlp/Llama-3-8B-ProLong-512k-Base/6c3d4b07-14c5-4218-862f-2aca386f5144.json b/data/hfopenllm_v2/meta/princeton-nlp/Llama-3-8B-ProLong-512k-Base/6c3d4b07-14c5-4218-862f-2aca386f5144.json new file mode 100644 index 000000000..0b8983a5b --- /dev/null +++ b/data/hfopenllm_v2/meta/princeton-nlp/Llama-3-8B-ProLong-512k-Base/6c3d4b07-14c5-4218-862f-2aca386f5144.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-8B-ProLong-512k-Base/1762652580.442863", + "retrieved_timestamp": "1762652580.4428642", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-8B-ProLong-512k-Base", + "developer": "meta", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-8B-ProLong-512k-Base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5322123077877808 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5033213133882991 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06873111782477341 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4222708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33294547872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/princeton-nlp/Llama-3-8B-ProLong-64k-Base/171a1779-0f17-4514-96ae-e4f9acea86b4.json b/data/hfopenllm_v2/meta/princeton-nlp/Llama-3-8B-ProLong-64k-Base/171a1779-0f17-4514-96ae-e4f9acea86b4.json new file mode 100644 index 000000000..743518d9c --- /dev/null +++ b/data/hfopenllm_v2/meta/princeton-nlp/Llama-3-8B-ProLong-64k-Base/171a1779-0f17-4514-96ae-e4f9acea86b4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-8B-ProLong-64k-Base/1762652580.443676", + "retrieved_timestamp": "1762652580.443677", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-8B-ProLong-64k-Base", + "developer": "meta", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-8B-ProLong-64k-Base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5200722970606879 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49271325981523906 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4340520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3347739361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/princeton-nlp/Sheared-LLaMA-1.3B/578905fb-a4a6-4dcd-9b09-ff5289568b91.json b/data/hfopenllm_v2/meta/princeton-nlp/Sheared-LLaMA-1.3B/578905fb-a4a6-4dcd-9b09-ff5289568b91.json new file mode 100644 index 000000000..8c448d76e --- /dev/null +++ b/data/hfopenllm_v2/meta/princeton-nlp/Sheared-LLaMA-1.3B/578905fb-a4a6-4dcd-9b09-ff5289568b91.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Sheared-LLaMA-1.3B/1762652580.4538639", + "retrieved_timestamp": "1762652580.453865", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Sheared-LLaMA-1.3B", + "developer": "meta", + "inference_platform": "unknown", + "id": "princeton-nlp/Sheared-LLaMA-1.3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2197702097102355 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31970467392464424 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23993288590604026 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3713020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11710438829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.3 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/princeton-nlp/Sheared-LLaMA-2.7B/3a0252c3-ced9-4cb4-94ef-d3800ac15ff9.json b/data/hfopenllm_v2/meta/princeton-nlp/Sheared-LLaMA-2.7B/3a0252c3-ced9-4cb4-94ef-d3800ac15ff9.json new file mode 100644 index 000000000..4aba6265a --- /dev/null +++ b/data/hfopenllm_v2/meta/princeton-nlp/Sheared-LLaMA-2.7B/3a0252c3-ced9-4cb4-94ef-d3800ac15ff9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Sheared-LLaMA-2.7B/1762652580.4540951", + "retrieved_timestamp": "1762652580.4540958", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Sheared-LLaMA-2.7B", + "developer": "meta", + "inference_platform": "unknown", + "id": "princeton-nlp/Sheared-LLaMA-2.7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24165214962964932 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32586855691245953 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3567291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11868351063829788 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 2.7 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/prithivMLmods/Deepthink-Llama-3-8B-Preview/020f77a1-1051-4f85-8037-ed4f8b12474a.json b/data/hfopenllm_v2/meta/prithivMLmods/Deepthink-Llama-3-8B-Preview/020f77a1-1051-4f85-8037-ed4f8b12474a.json new file mode 100644 index 000000000..f27f7379c --- /dev/null +++ b/data/hfopenllm_v2/meta/prithivMLmods/Deepthink-Llama-3-8B-Preview/020f77a1-1051-4f85-8037-ed4f8b12474a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Deepthink-Llama-3-8B-Preview/1762652580.459939", + "retrieved_timestamp": "1762652580.459939", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Deepthink-Llama-3-8B-Preview", + "developer": "meta", + "inference_platform": "unknown", + "id": "prithivMLmods/Deepthink-Llama-3-8B-Preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29553252037926037 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4664510845126107 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3549848942598187 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37070833333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2738530585106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/prithivMLmods/Llama-3.2-3B-Math-Oct/5ab1b41f-ee87-475c-b48b-e154c580d560.json b/data/hfopenllm_v2/meta/prithivMLmods/Llama-3.2-3B-Math-Oct/5ab1b41f-ee87-475c-b48b-e154c580d560.json new file mode 100644 index 000000000..a9c5ee4df --- /dev/null +++ b/data/hfopenllm_v2/meta/prithivMLmods/Llama-3.2-3B-Math-Oct/5ab1b41f-ee87-475c-b48b-e154c580d560.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-3.2-3B-Math-Oct/1762652580.464829", + "retrieved_timestamp": "1762652580.46483", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Llama-3.2-3B-Math-Oct", + "developer": "meta", + "inference_platform": "unknown", + "id": "prithivMLmods/Llama-3.2-3B-Math-Oct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4585233846194763 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4371840952508727 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11555891238670694 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34698958333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911402925531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/prithivMLmods/Llama-3.2-6B-AlgoCode/914b588e-6da8-4a08-9313-ac7004fd8b97.json b/data/hfopenllm_v2/meta/prithivMLmods/Llama-3.2-6B-AlgoCode/914b588e-6da8-4a08-9313-ac7004fd8b97.json new file mode 100644 index 000000000..b30f58f36 --- /dev/null +++ b/data/hfopenllm_v2/meta/prithivMLmods/Llama-3.2-6B-AlgoCode/914b588e-6da8-4a08-9313-ac7004fd8b97.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-3.2-6B-AlgoCode/1762652580.465046", + "retrieved_timestamp": "1762652580.465046", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Llama-3.2-6B-AlgoCode", + "developer": "meta", + "inference_platform": "unknown", + "id": "prithivMLmods/Llama-3.2-6B-AlgoCode" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21357553513566227 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37477424449567703 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2869127516778524 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40134374999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17977061170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.339 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/prithivMLmods/Llama-8B-Distill-CoT/6b1d1057-0091-4e44-822f-f7c1e5dc3ce9.json b/data/hfopenllm_v2/meta/prithivMLmods/Llama-8B-Distill-CoT/6b1d1057-0091-4e44-822f-f7c1e5dc3ce9.json new file mode 100644 index 000000000..9ffbb9356 --- /dev/null +++ b/data/hfopenllm_v2/meta/prithivMLmods/Llama-8B-Distill-CoT/6b1d1057-0091-4e44-822f-f7c1e5dc3ce9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-8B-Distill-CoT/1762652580.465258", + "retrieved_timestamp": "1762652580.465258", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Llama-8B-Distill-CoT", + "developer": "meta", + "inference_platform": "unknown", + "id": "prithivMLmods/Llama-8B-Distill-CoT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3341511633576688 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4297620873695442 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3719791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.273188164893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/prithivMLmods/Llama-Deepsync-1B/5516c5d6-29c9-46dc-ae29-61876fb488c2.json b/data/hfopenllm_v2/meta/prithivMLmods/Llama-Deepsync-1B/5516c5d6-29c9-46dc-ae29-61876fb488c2.json new file mode 100644 index 000000000..8f3dce9ce --- /dev/null +++ b/data/hfopenllm_v2/meta/prithivMLmods/Llama-Deepsync-1B/5516c5d6-29c9-46dc-ae29-61876fb488c2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-Deepsync-1B/1762652580.4655502", + "retrieved_timestamp": "1762652580.4655511", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Llama-Deepsync-1B", + "developer": "meta", + "inference_platform": "unknown", + "id": "prithivMLmods/Llama-Deepsync-1B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3570071853792382 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33856262083940014 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35651041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17378656914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/prithivMLmods/Llama-Deepsync-3B/fbdcf318-d1b5-4ed6-b13d-efb14dfaf09f.json b/data/hfopenllm_v2/meta/prithivMLmods/Llama-Deepsync-3B/fbdcf318-d1b5-4ed6-b13d-efb14dfaf09f.json new file mode 100644 index 000000000..7cad03365 --- /dev/null +++ b/data/hfopenllm_v2/meta/prithivMLmods/Llama-Deepsync-3B/fbdcf318-d1b5-4ed6-b13d-efb14dfaf09f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-Deepsync-3B/1762652580.465787", + "retrieved_timestamp": "1762652580.465788", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Llama-Deepsync-3B", + "developer": "meta", + "inference_platform": "unknown", + "id": "prithivMLmods/Llama-Deepsync-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4302218114602588 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4291521655271033 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11782477341389729 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33238541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3031083776595745 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/prithivMLmods/Llama-Express.1-Math/99fd40d7-8d26-4088-ba03-1c1d7ed11ca0.json b/data/hfopenllm_v2/meta/prithivMLmods/Llama-Express.1-Math/99fd40d7-8d26-4088-ba03-1c1d7ed11ca0.json new file mode 100644 index 000000000..c67cac18a --- /dev/null +++ b/data/hfopenllm_v2/meta/prithivMLmods/Llama-Express.1-Math/99fd40d7-8d26-4088-ba03-1c1d7ed11ca0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-Express.1-Math/1762652580.466016", + "retrieved_timestamp": "1762652580.466017", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Llama-Express.1-Math", + "developer": "meta", + "inference_platform": "unknown", + "id": "prithivMLmods/Llama-Express.1-Math" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5084320713484665 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33638140090435265 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.055891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31434375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16098736702127658 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/pszemraj/Llama-3-6.3b-v0.1/74260e1f-8b2d-40ac-ac96-f268d65fa838.json b/data/hfopenllm_v2/meta/pszemraj/Llama-3-6.3b-v0.1/74260e1f-8b2d-40ac-ac96-f268d65fa838.json new file mode 100644 index 000000000..f3d5f02d0 --- /dev/null +++ b/data/hfopenllm_v2/meta/pszemraj/Llama-3-6.3b-v0.1/74260e1f-8b2d-40ac-ac96-f268d65fa838.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pszemraj_Llama-3-6.3b-v0.1/1762652580.4812942", + "retrieved_timestamp": "1762652580.481295", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pszemraj/Llama-3-6.3b-v0.1", + "developer": "meta", + "inference_platform": "unknown", + "id": "pszemraj/Llama-3-6.3b-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10438968603305895 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41968070468284147 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.021148036253776436 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3908333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2839926861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.3 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/qingy2019/LLaMa_3.2_3B_Catalysts/2fb27531-96ee-48d2-9416-43ef790d7196.json b/data/hfopenllm_v2/meta/qingy2019/LLaMa_3.2_3B_Catalysts/2fb27531-96ee-48d2-9416-43ef790d7196.json new file mode 100644 index 000000000..c6724893c --- /dev/null +++ b/data/hfopenllm_v2/meta/qingy2019/LLaMa_3.2_3B_Catalysts/2fb27531-96ee-48d2-9416-43ef790d7196.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2019_LLaMa_3.2_3B_Catalysts/1762652580.4818308", + "retrieved_timestamp": "1762652580.481832", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2019/LLaMa_3.2_3B_Catalysts", + "developer": "meta", + "inference_platform": "unknown", + "id": "qingy2019/LLaMa_3.2_3B_Catalysts" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.499239794855428 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44681268798954793 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12915407854984895 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37877083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30078125 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/qingy2019/OpenMath2-Llama3.1-8B/75da6225-cc30-480c-b33e-359648932d9d.json b/data/hfopenllm_v2/meta/qingy2019/OpenMath2-Llama3.1-8B/75da6225-cc30-480c-b33e-359648932d9d.json new file mode 100644 index 000000000..046aab852 --- /dev/null +++ b/data/hfopenllm_v2/meta/qingy2019/OpenMath2-Llama3.1-8B/75da6225-cc30-480c-b33e-359648932d9d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2019_OpenMath2-Llama3.1-8B/1762652580.482083", + "retrieved_timestamp": "1762652580.482084", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2019/OpenMath2-Llama3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "qingy2019/OpenMath2-Llama3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23305939352030391 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40955241401694514 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2673716012084592 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34355208333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15533577127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/refuelai/Llama-3-Refueled/2f104869-3a3b-4d25-987b-77dba089b817.json b/data/hfopenllm_v2/meta/refuelai/Llama-3-Refueled/2f104869-3a3b-4d25-987b-77dba089b817.json new file mode 100644 index 000000000..32ded2d85 --- /dev/null +++ b/data/hfopenllm_v2/meta/refuelai/Llama-3-Refueled/2f104869-3a3b-4d25-987b-77dba089b817.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/refuelai_Llama-3-Refueled/1762652580.494146", + "retrieved_timestamp": "1762652580.494147", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "refuelai/Llama-3-Refueled", + "developer": "meta", + "inference_platform": "unknown", + "id": "refuelai/Llama-3-Refueled" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4619952836252255 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5870766201705051 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44540625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30950797872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/riaz/FineLlama-3.1-8B/55eb0438-f0bd-4f9d-8bff-577d0245a57c.json b/data/hfopenllm_v2/meta/riaz/FineLlama-3.1-8B/55eb0438-f0bd-4f9d-8bff-577d0245a57c.json new file mode 100644 index 000000000..5b07ceb8e --- /dev/null +++ b/data/hfopenllm_v2/meta/riaz/FineLlama-3.1-8B/55eb0438-f0bd-4f9d-8bff-577d0245a57c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/riaz_FineLlama-3.1-8B/1762652580.495657", + "retrieved_timestamp": "1762652580.495657", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "riaz/FineLlama-3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "riaz/FineLlama-3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43734070045257695 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45857296498013483 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3762916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29637632978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/riaz/FineLlama-3.1-8B/d5fb7571-bafd-424a-87f5-2d14ac7bd8d2.json b/data/hfopenllm_v2/meta/riaz/FineLlama-3.1-8B/d5fb7571-bafd-424a-87f5-2d14ac7bd8d2.json new file mode 100644 index 000000000..608ac87d5 --- /dev/null +++ b/data/hfopenllm_v2/meta/riaz/FineLlama-3.1-8B/d5fb7571-bafd-424a-87f5-2d14ac7bd8d2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/riaz_FineLlama-3.1-8B/1762652580.4959512", + "retrieved_timestamp": "1762652580.495952", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "riaz/FineLlama-3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "riaz/FineLlama-3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.413660199382084 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.456451981676995 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37762500000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29778922872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/rombodawg/rombos_Replete-Coder-Llama3-8B/af3522f6-e26f-491f-8ccc-df064e5d3010.json b/data/hfopenllm_v2/meta/rombodawg/rombos_Replete-Coder-Llama3-8B/af3522f6-e26f-491f-8ccc-df064e5d3010.json new file mode 100644 index 000000000..75ceb415d --- /dev/null +++ b/data/hfopenllm_v2/meta/rombodawg/rombos_Replete-Coder-Llama3-8B/af3522f6-e26f-491f-8ccc-df064e5d3010.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rombodawg_rombos_Replete-Coder-Llama3-8B/1762652580.5000498", + "retrieved_timestamp": "1762652580.500051", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rombodawg/rombos_Replete-Coder-Llama3-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "rombodawg/rombos_Replete-Coder-Llama3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4714125187834945 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32762771025266835 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03927492447129909 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39663541666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13347739361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-CPO/2ecc5d1d-edb7-4713-9bde-f83ab4736690.json b/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-CPO/2ecc5d1d-edb7-4713-9bde-f83ab4736690.json new file mode 100644 index 000000000..fcfc40218 --- /dev/null +++ b/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-CPO/2ecc5d1d-edb7-4713-9bde-f83ab4736690.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-CPO/1762652580.502833", + "retrieved_timestamp": "1762652580.502836", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sabersaleh/Llama2-7B-CPO", + "developer": "meta", + "inference_platform": "unknown", + "id": "sabersaleh/Llama2-7B-CPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1545488193548673 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3457919655499851 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40482291666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1605718085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-IPO/14deb011-b6ce-47c7-b855-c7ebcc291121.json b/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-IPO/14deb011-b6ce-47c7-b855-c7ebcc291121.json new file mode 100644 index 000000000..4ddde947e --- /dev/null +++ b/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-IPO/14deb011-b6ce-47c7-b855-c7ebcc291121.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-IPO/1762652580.503558", + "retrieved_timestamp": "1762652580.5035589", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sabersaleh/Llama2-7B-IPO", + "developer": "meta", + "inference_platform": "unknown", + "id": "sabersaleh/Llama2-7B-IPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17685518867715438 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3474552716912811 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4047604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16173537234042554 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-KTO/0744b5c6-e109-4ccb-acc9-955106ef5562.json b/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-KTO/0744b5c6-e109-4ccb-acc9-955106ef5562.json new file mode 100644 index 000000000..eec0d38f8 --- /dev/null +++ b/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-KTO/0744b5c6-e109-4ccb-acc9-955106ef5562.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-KTO/1762652580.503802", + "retrieved_timestamp": "1762652580.5038028", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sabersaleh/Llama2-7B-KTO", + "developer": "meta", + "inference_platform": "unknown", + "id": "sabersaleh/Llama2-7B-KTO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15284999357260956 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35007577568366255 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0188821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41669791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1636469414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-SPO/cfbdbc52-d846-48e7-bad4-f6240f1d2551.json b/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-SPO/cfbdbc52-d846-48e7-bad4-f6240f1d2551.json new file mode 100644 index 000000000..974df6a73 --- /dev/null +++ b/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-SPO/cfbdbc52-d846-48e7-bad4-f6240f1d2551.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-SPO/1762652580.504033", + "retrieved_timestamp": "1762652580.504034", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sabersaleh/Llama2-7B-SPO", + "developer": "meta", + "inference_platform": "unknown", + "id": "sabersaleh/Llama2-7B-SPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15667207453999832 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33834029554844597 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3874270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17569813829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-SimPO/a530f116-e413-4d73-8d1f-2f44fcc0c6a9.json b/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-SimPO/a530f116-e413-4d73-8d1f-2f44fcc0c6a9.json new file mode 100644 index 000000000..b06f6b67a --- /dev/null +++ b/data/hfopenllm_v2/meta/sabersaleh/Llama2-7B-SimPO/a530f116-e413-4d73-8d1f-2f44fcc0c6a9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-SimPO/1762652580.504319", + "retrieved_timestamp": "1762652580.50432", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sabersaleh/Llama2-7B-SimPO", + "developer": "meta", + "inference_platform": "unknown", + "id": "sabersaleh/Llama2-7B-SimPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1658643510330368 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34891553101294254 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40069791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16414561170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/sabersaleh/Llama3/286860d2-7f43-4488-9d43-9058fe59b248.json b/data/hfopenllm_v2/meta/sabersaleh/Llama3/286860d2-7f43-4488-9d43-9058fe59b248.json new file mode 100644 index 000000000..53e8dbaaf --- /dev/null +++ b/data/hfopenllm_v2/meta/sabersaleh/Llama3/286860d2-7f43-4488-9d43-9058fe59b248.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sabersaleh_Llama3/1762652580.504582", + "retrieved_timestamp": "1762652580.504583", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sabersaleh/Llama3", + "developer": "meta", + "inference_platform": "unknown", + "id": "sabersaleh/Llama3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3320777758569484 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47821899796340944 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39334375000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.316156914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/sabersalehk/Llama3-001-300/f73009ad-891e-41e7-a6bc-a271894f5511.json b/data/hfopenllm_v2/meta/sabersalehk/Llama3-001-300/f73009ad-891e-41e7-a6bc-a271894f5511.json new file mode 100644 index 000000000..0596b6cfe --- /dev/null +++ b/data/hfopenllm_v2/meta/sabersalehk/Llama3-001-300/f73009ad-891e-41e7-a6bc-a271894f5511.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sabersalehk_Llama3-001-300/1762652580.504826", + "retrieved_timestamp": "1762652580.504826", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sabersalehk/Llama3-001-300", + "developer": "meta", + "inference_platform": "unknown", + "id": "sabersalehk/Llama3-001-300" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3178643776291351 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47445771982516544 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.052870090634441085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40639583333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3158244680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/sabersalehk/Llama3-SimPO/b88f3d13-a8ed-4e23-86ec-1531c3151f0f.json b/data/hfopenllm_v2/meta/sabersalehk/Llama3-SimPO/b88f3d13-a8ed-4e23-86ec-1531c3151f0f.json new file mode 100644 index 000000000..6fb7a5b6c --- /dev/null +++ b/data/hfopenllm_v2/meta/sabersalehk/Llama3-SimPO/b88f3d13-a8ed-4e23-86ec-1531c3151f0f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sabersalehk_Llama3-SimPO/1762652580.505101", + "retrieved_timestamp": "1762652580.5051022", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sabersalehk/Llama3-SimPO", + "developer": "meta", + "inference_platform": "unknown", + "id": "sabersalehk/Llama3-SimPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36420142998355476 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48735382942408356 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40459375000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3156582446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/sabersalehk/Llama3_001_200/f673b2f9-8b77-42a3-9066-29f21a1ca0f8.json b/data/hfopenllm_v2/meta/sabersalehk/Llama3_001_200/f673b2f9-8b77-42a3-9066-29f21a1ca0f8.json new file mode 100644 index 000000000..2a0c5b438 --- /dev/null +++ b/data/hfopenllm_v2/meta/sabersalehk/Llama3_001_200/f673b2f9-8b77-42a3-9066-29f21a1ca0f8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sabersalehk_Llama3_001_200/1762652580.505313", + "retrieved_timestamp": "1762652580.505314", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sabersalehk/Llama3_001_200", + "developer": "meta", + "inference_platform": "unknown", + "id": "sabersalehk/Llama3_001_200" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.321836061649756 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4727921518419169 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4037291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31831781914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/sabersalehk/Llama3_01_300/55ae7ee9-2c50-45d6-ac0e-7c07bbad9a00.json b/data/hfopenllm_v2/meta/sabersalehk/Llama3_01_300/55ae7ee9-2c50-45d6-ac0e-7c07bbad9a00.json new file mode 100644 index 000000000..420e22a7a --- /dev/null +++ b/data/hfopenllm_v2/meta/sabersalehk/Llama3_01_300/55ae7ee9-2c50-45d6-ac0e-7c07bbad9a00.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sabersalehk_Llama3_01_300/1762652580.505522", + "retrieved_timestamp": "1762652580.505523", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sabersalehk/Llama3_01_300", + "developer": "meta", + "inference_platform": "unknown", + "id": "sabersalehk/Llama3_01_300" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2958827023408999 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4691387139601247 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04984894259818731 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40648958333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31241688829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/sakhan10/quantized_open_llama_3b_v2/f96ce5a9-7cc2-4380-9285-09052b906411.json b/data/hfopenllm_v2/meta/sakhan10/quantized_open_llama_3b_v2/f96ce5a9-7cc2-4380-9285-09052b906411.json new file mode 100644 index 000000000..2ef1a8c79 --- /dev/null +++ b/data/hfopenllm_v2/meta/sakhan10/quantized_open_llama_3b_v2/f96ce5a9-7cc2-4380-9285-09052b906411.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sakhan10_quantized_open_llama_3b_v2/1762652580.507647", + "retrieved_timestamp": "1762652580.507648", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sakhan10/quantized_open_llama_3b_v2", + "developer": "meta", + "inference_platform": "unknown", + "id": "sakhan10/quantized_open_llama_3b_v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18722212618075595 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3019800780121471 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3681666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10954122340425532 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/sequelbox/Llama3.1-70B-PlumChat/ab796471-db79-40a2-8147-72ed7099b355.json b/data/hfopenllm_v2/meta/sequelbox/Llama3.1-70B-PlumChat/ab796471-db79-40a2-8147-72ed7099b355.json new file mode 100644 index 000000000..d7715792e --- /dev/null +++ b/data/hfopenllm_v2/meta/sequelbox/Llama3.1-70B-PlumChat/ab796471-db79-40a2-8147-72ed7099b355.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sequelbox_Llama3.1-70B-PlumChat/1762652580.5115242", + "retrieved_timestamp": "1762652580.5115242", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sequelbox/Llama3.1-70B-PlumChat", + "developer": "meta", + "inference_platform": "unknown", + "id": "sequelbox/Llama3.1-70B-PlumChat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5616131863455631 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6752815345736151 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028700906344411 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39093959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47737500000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.516373005319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-MOTH/3a820ba4-bdd8-4caf-a90a-d7e9fee52997.json b/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-MOTH/3a820ba4-bdd8-4caf-a90a-d7e9fee52997.json new file mode 100644 index 000000000..f07ce8345 --- /dev/null +++ b/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-MOTH/3a820ba4-bdd8-4caf-a90a-d7e9fee52997.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sequelbox_Llama3.1-8B-MOTH/1762652580.511786", + "retrieved_timestamp": "1762652580.511787", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sequelbox/Llama3.1-8B-MOTH", + "developer": "meta", + "inference_platform": "unknown", + "id": "sequelbox/Llama3.1-8B-MOTH" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5244938984117696 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.490246673015408 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1216012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3689166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3338597074468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumChat/32f38aeb-615c-4785-a674-bd8a50eb1057.json b/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumChat/32f38aeb-615c-4785-a674-bd8a50eb1057.json new file mode 100644 index 000000000..4ef2105c1 --- /dev/null +++ b/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumChat/32f38aeb-615c-4785-a674-bd8a50eb1057.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sequelbox_Llama3.1-8B-PlumChat/1762652580.512009", + "retrieved_timestamp": "1762652580.51201", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sequelbox/Llama3.1-8B-PlumChat", + "developer": "meta", + "inference_platform": "unknown", + "id": "sequelbox/Llama3.1-8B-PlumChat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42427647530773904 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3873291395699702 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03625377643504532 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3754583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21268284574468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumCode/2695c341-eabe-4809-9b87-9e771e1ee9d6.json b/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumCode/2695c341-eabe-4809-9b87-9e771e1ee9d6.json new file mode 100644 index 000000000..ac6c424e2 --- /dev/null +++ b/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumCode/2695c341-eabe-4809-9b87-9e771e1ee9d6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sequelbox_Llama3.1-8B-PlumCode/1762652580.512235", + "retrieved_timestamp": "1762652580.512235", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sequelbox/Llama3.1-8B-PlumCode", + "developer": "meta", + "inference_platform": "unknown", + "id": "sequelbox/Llama3.1-8B-PlumCode" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20448299401144518 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3368086861425416 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027190332326283987 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37734375000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23354388297872342 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumMath/4734bf79-d464-43b4-8df3-1937f7c37796.json b/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumMath/4734bf79-d464-43b4-8df3-1937f7c37796.json new file mode 100644 index 000000000..b28e12f17 --- /dev/null +++ b/data/hfopenllm_v2/meta/sequelbox/Llama3.1-8B-PlumMath/4734bf79-d464-43b4-8df3-1937f7c37796.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sequelbox_Llama3.1-8B-PlumMath/1762652580.512456", + "retrieved_timestamp": "1762652580.512456", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sequelbox/Llama3.1-8B-PlumMath", + "developer": "meta", + "inference_platform": "unknown", + "id": "sequelbox/Llama3.1-8B-PlumMath" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.224241678745728 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40323023090048143 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39185416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29753989361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/sethuiyer/LlamaZero-3.1-8B-Experimental-1208/abebe996-35e4-4fa6-a16c-0b33481d7357.json b/data/hfopenllm_v2/meta/sethuiyer/LlamaZero-3.1-8B-Experimental-1208/abebe996-35e4-4fa6-a16c-0b33481d7357.json new file mode 100644 index 000000000..21c392f8d --- /dev/null +++ b/data/hfopenllm_v2/meta/sethuiyer/LlamaZero-3.1-8B-Experimental-1208/abebe996-35e4-4fa6-a16c-0b33481d7357.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sethuiyer_LlamaZero-3.1-8B-Experimental-1208/1762652580.5134048", + "retrieved_timestamp": "1762652580.513406", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sethuiyer/LlamaZero-3.1-8B-Experimental-1208", + "developer": "meta", + "inference_platform": "unknown", + "id": "sethuiyer/LlamaZero-3.1-8B-Experimental-1208" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6051022398347496 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49813698712445653 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10800604229607251 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38199999999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2999501329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/skumar9/Llama-medx_v2/1bfc4a7a-2ac8-4454-bbee-0db62608ce5a.json b/data/hfopenllm_v2/meta/skumar9/Llama-medx_v2/1bfc4a7a-2ac8-4454-bbee-0db62608ce5a.json new file mode 100644 index 000000000..30c546fb4 --- /dev/null +++ b/data/hfopenllm_v2/meta/skumar9/Llama-medx_v2/1bfc4a7a-2ac8-4454-bbee-0db62608ce5a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/skumar9_Llama-medx_v2/1762652580.517576", + "retrieved_timestamp": "1762652580.517576", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "skumar9/Llama-medx_v2", + "developer": "meta", + "inference_platform": "unknown", + "id": "skumar9/Llama-medx_v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4462337708391512 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4908589512175783 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09138972809667674 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36612500000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34632646276595747 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/suayptalha/DeepSeek-R1-Distill-Llama-3B/4146ffb5-ac76-43b7-acdc-8c181f2c60d2.json b/data/hfopenllm_v2/meta/suayptalha/DeepSeek-R1-Distill-Llama-3B/4146ffb5-ac76-43b7-acdc-8c181f2c60d2.json new file mode 100644 index 000000000..12b40f159 --- /dev/null +++ b/data/hfopenllm_v2/meta/suayptalha/DeepSeek-R1-Distill-Llama-3B/4146ffb5-ac76-43b7-acdc-8c181f2c60d2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/suayptalha_DeepSeek-R1-Distill-Llama-3B/1762652580.543217", + "retrieved_timestamp": "1762652580.543217", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "suayptalha/DeepSeek-R1-Distill-Llama-3B", + "developer": "meta", + "inference_platform": "unknown", + "id": "suayptalha/DeepSeek-R1-Distill-Llama-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7092658590318134 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44517853159705956 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20921450151057402 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33958333333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29778922872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/suayptalha/Komodo-Llama-3.2-3B-v2-fp16/d86e291c-cc26-475c-9ccd-e3ee68e8bee2.json b/data/hfopenllm_v2/meta/suayptalha/Komodo-Llama-3.2-3B-v2-fp16/d86e291c-cc26-475c-9ccd-e3ee68e8bee2.json new file mode 100644 index 000000000..07d8343cc --- /dev/null +++ b/data/hfopenllm_v2/meta/suayptalha/Komodo-Llama-3.2-3B-v2-fp16/d86e291c-cc26-475c-9ccd-e3ee68e8bee2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/suayptalha_Komodo-Llama-3.2-3B-v2-fp16/1762652580.543882", + "retrieved_timestamp": "1762652580.543883", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "suayptalha/Komodo-Llama-3.2-3B-v2-fp16", + "developer": "meta", + "inference_platform": "unknown", + "id": "suayptalha/Komodo-Llama-3.2-3B-v2-fp16" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6340532010620709 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43549964909074995 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34057291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523936170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/sumink/flflmillama/19f198e5-37b8-4d62-8cbe-849f6875d39e.json b/data/hfopenllm_v2/meta/sumink/flflmillama/19f198e5-37b8-4d62-8cbe-849f6875d39e.json new file mode 100644 index 000000000..c1e5c5c72 --- /dev/null +++ b/data/hfopenllm_v2/meta/sumink/flflmillama/19f198e5-37b8-4d62-8cbe-849f6875d39e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_flflmillama/1762652580.5473018", + "retrieved_timestamp": "1762652580.5473018", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/flflmillama", + "developer": "meta", + "inference_platform": "unknown", + "id": "sumink/flflmillama" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16756317681529453 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38511286094747693 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35911458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20960771276595744 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/sumink/llamaft/a13b4873-22c0-461a-b4ba-41246ede0dfa.json b/data/hfopenllm_v2/meta/sumink/llamaft/a13b4873-22c0-461a-b4ba-41246ede0dfa.json new file mode 100644 index 000000000..f1dfe249c --- /dev/null +++ b/data/hfopenllm_v2/meta/sumink/llamaft/a13b4873-22c0-461a-b4ba-41246ede0dfa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_llamaft/1762652580.547796", + "retrieved_timestamp": "1762652580.547797", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/llamaft", + "developer": "meta", + "inference_platform": "unknown", + "id": "sumink/llamaft" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16086871722584964 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3762775648269859 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3498125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21143617021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/sumink/llamamerge/f7406d3e-dbfa-4f12-946e-f4e58c728fa8.json b/data/hfopenllm_v2/meta/sumink/llamamerge/f7406d3e-dbfa-4f12-946e-f4e58c728fa8.json new file mode 100644 index 000000000..d7b6ae7f1 --- /dev/null +++ b/data/hfopenllm_v2/meta/sumink/llamamerge/f7406d3e-dbfa-4f12-946e-f4e58c728fa8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_llamamerge/1762652580.547998", + "retrieved_timestamp": "1762652580.547999", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/llamamerge", + "developer": "meta", + "inference_platform": "unknown", + "id": "sumink/llamamerge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26718107953563214 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46316160070587903 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42397916666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2589760638297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.016 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/tenyx/Llama3-TenyxChat-70B/6fc094c0-ca29-4594-b086-2dae90195e8d.json b/data/hfopenllm_v2/meta/tenyx/Llama3-TenyxChat-70B/6fc094c0-ca29-4594-b086-2dae90195e8d.json new file mode 100644 index 000000000..088c9e1af --- /dev/null +++ b/data/hfopenllm_v2/meta/tenyx/Llama3-TenyxChat-70B/6fc094c0-ca29-4594-b086-2dae90195e8d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tenyx_Llama3-TenyxChat-70B/1762652580.5593112", + "retrieved_timestamp": "1762652580.5593119", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tenyx/Llama3-TenyxChat-70B", + "developer": "meta", + "inference_platform": "unknown", + "id": "tenyx/Llama3-TenyxChat-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8087086707713311 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6511486901811531 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23564954682779457 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42603125000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5210272606382979 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/theprint/CleverBoi-Llama-3.1-8B-v2/42ea4b8d-98af-4c57-8b55-cef38c473fd5.json b/data/hfopenllm_v2/meta/theprint/CleverBoi-Llama-3.1-8B-v2/42ea4b8d-98af-4c57-8b55-cef38c473fd5.json new file mode 100644 index 000000000..10f382f3c --- /dev/null +++ b/data/hfopenllm_v2/meta/theprint/CleverBoi-Llama-3.1-8B-v2/42ea4b8d-98af-4c57-8b55-cef38c473fd5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/theprint_CleverBoi-Llama-3.1-8B-v2/1762652580.560884", + "retrieved_timestamp": "1762652580.560884", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "theprint/CleverBoi-Llama-3.1-8B-v2", + "developer": "meta", + "inference_platform": "unknown", + "id": "theprint/CleverBoi-Llama-3.1-8B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19613957632415324 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46678160110644784 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.052870090634441085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37346875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31881648936170215 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 9.3 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/theprint/Code-Llama-Bagel-8B/3a63b21d-0aaa-45d5-ae12-6d6c9777edbe.json b/data/hfopenllm_v2/meta/theprint/Code-Llama-Bagel-8B/3a63b21d-0aaa-45d5-ae12-6d6c9777edbe.json new file mode 100644 index 000000000..3a01dfedd --- /dev/null +++ b/data/hfopenllm_v2/meta/theprint/Code-Llama-Bagel-8B/3a63b21d-0aaa-45d5-ae12-6d6c9777edbe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/theprint_Code-Llama-Bagel-8B/1762652580.561388", + "retrieved_timestamp": "1762652580.5613928", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "theprint/Code-Llama-Bagel-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "theprint/Code-Llama-Bagel-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2529676813078188 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46974200049001086 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3679791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28216422872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/theprint/Llama-3.2-3B-VanRossum/78e423de-2f66-4c53-8d07-8401802973ca.json b/data/hfopenllm_v2/meta/theprint/Llama-3.2-3B-VanRossum/78e423de-2f66-4c53-8d07-8401802973ca.json new file mode 100644 index 000000000..10af06d61 --- /dev/null +++ b/data/hfopenllm_v2/meta/theprint/Llama-3.2-3B-VanRossum/78e423de-2f66-4c53-8d07-8401802973ca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/theprint_Llama-3.2-3B-VanRossum/1762652580.562204", + "retrieved_timestamp": "1762652580.562206", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "theprint/Llama-3.2-3B-VanRossum", + "developer": "meta", + "inference_platform": "unknown", + "id": "theprint/Llama-3.2-3B-VanRossum" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4782820693537591 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42787418229776697 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09743202416918428 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3441666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27701130319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 3.696 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/theprint/ReWiz-Llama-3.1-8B-v2/e57e6483-7e4c-4a64-8c58-890aafb38f37.json b/data/hfopenllm_v2/meta/theprint/ReWiz-Llama-3.1-8B-v2/e57e6483-7e4c-4a64-8c58-890aafb38f37.json new file mode 100644 index 000000000..0466ea2e7 --- /dev/null +++ b/data/hfopenllm_v2/meta/theprint/ReWiz-Llama-3.1-8B-v2/e57e6483-7e4c-4a64-8c58-890aafb38f37.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Llama-3.1-8B-v2/1762652580.5627892", + "retrieved_timestamp": "1762652580.56279", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "theprint/ReWiz-Llama-3.1-8B-v2", + "developer": "meta", + "inference_platform": "unknown", + "id": "theprint/ReWiz-Llama-3.1-8B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23790542427425895 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46324275457450953 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.381375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3310339095744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 9.3 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/theprint/ReWiz-Llama-3.2-3B/17d4fced-6a93-4e5e-8349-25dae16596f8.json b/data/hfopenllm_v2/meta/theprint/ReWiz-Llama-3.2-3B/17d4fced-6a93-4e5e-8349-25dae16596f8.json new file mode 100644 index 000000000..114ee75b2 --- /dev/null +++ b/data/hfopenllm_v2/meta/theprint/ReWiz-Llama-3.2-3B/17d4fced-6a93-4e5e-8349-25dae16596f8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Llama-3.2-3B/1762652580.5630422", + "retrieved_timestamp": "1762652580.563043", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "theprint/ReWiz-Llama-3.2-3B", + "developer": "meta", + "inference_platform": "unknown", + "id": "theprint/ReWiz-Llama-3.2-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4648931501748693 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4343257577815292 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1095166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.361375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28873005319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/togethercomputer/LLaMA-2-7B-32K/29dae40d-4786-4fbc-92fa-3415b0c35488.json b/data/hfopenllm_v2/meta/togethercomputer/LLaMA-2-7B-32K/29dae40d-4786-4fbc-92fa-3415b0c35488.json new file mode 100644 index 000000000..95cb485e6 --- /dev/null +++ b/data/hfopenllm_v2/meta/togethercomputer/LLaMA-2-7B-32K/29dae40d-4786-4fbc-92fa-3415b0c35488.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/togethercomputer_LLaMA-2-7B-32K/1762652580.574694", + "retrieved_timestamp": "1762652580.5746949", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "togethercomputer/LLaMA-2-7B-32K", + "developer": "meta", + "inference_platform": "unknown", + "id": "togethercomputer/LLaMA-2-7B-32K" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18649738250065384 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33995175217301715 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3753645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17677859042553193 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/trthminh1112/autotrain-llama32-1b-finetune/cad93026-baf2-47ef-a554-4d0ba0d5a946.json b/data/hfopenllm_v2/meta/trthminh1112/autotrain-llama32-1b-finetune/cad93026-baf2-47ef-a554-4d0ba0d5a946.json new file mode 100644 index 000000000..72db8d5ce --- /dev/null +++ b/data/hfopenllm_v2/meta/trthminh1112/autotrain-llama32-1b-finetune/cad93026-baf2-47ef-a554-4d0ba0d5a946.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/trthminh1112_autotrain-llama32-1b-finetune/1762652580.577601", + "retrieved_timestamp": "1762652580.5776021", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "trthminh1112/autotrain-llama32-1b-finetune", + "developer": "meta", + "inference_platform": "unknown", + "id": "trthminh1112/autotrain-llama32-1b-finetune" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17685518867715438 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29956269409410674 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35127083333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10987367021276596 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/uukuguy/speechless-codellama-34b-v2.0/ddcf1dc2-5281-4d14-b870-7ed2fa44c8d0.json b/data/hfopenllm_v2/meta/uukuguy/speechless-codellama-34b-v2.0/ddcf1dc2-5281-4d14-b870-7ed2fa44c8d0.json new file mode 100644 index 000000000..081b9407e --- /dev/null +++ b/data/hfopenllm_v2/meta/uukuguy/speechless-codellama-34b-v2.0/ddcf1dc2-5281-4d14-b870-7ed2fa44c8d0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/uukuguy_speechless-codellama-34b-v2.0/1762652580.5824919", + "retrieved_timestamp": "1762652580.5824928", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "uukuguy/speechless-codellama-34b-v2.0", + "developer": "meta", + "inference_platform": "unknown", + "id": "uukuguy/speechless-codellama-34b-v2.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46042168113937687 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4813126697444618 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2692953020134229 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37870833333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25423869680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b/e9556ee4-63e8-4e0b-88df-62cc6c62c65a.json b/data/hfopenllm_v2/meta/uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b/e9556ee4-63e8-4e0b-88df-62cc6c62c65a.json new file mode 100644 index 000000000..04c80bdd2 --- /dev/null +++ b/data/hfopenllm_v2/meta/uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b/e9556ee4-63e8-4e0b-88df-62cc6c62c65a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/uukuguy_speechless-llama2-hermes-orca-platypus-wizardlm-13b/1762652580.5833302", + "retrieved_timestamp": "1762652580.583331", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b", + "developer": "meta", + "inference_platform": "unknown", + "id": "uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45617517076911485 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48455373040676664 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4655 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25590093085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.016 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/vhab10/llama-3-8b-merged-linear/deed0e49-b9fd-4623-bb90-3e885bec9bb0.json b/data/hfopenllm_v2/meta/vhab10/llama-3-8b-merged-linear/deed0e49-b9fd-4623-bb90-3e885bec9bb0.json new file mode 100644 index 000000000..3b4af5454 --- /dev/null +++ b/data/hfopenllm_v2/meta/vhab10/llama-3-8b-merged-linear/deed0e49-b9fd-4623-bb90-3e885bec9bb0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vhab10_llama-3-8b-merged-linear/1762652580.5860548", + "retrieved_timestamp": "1762652580.5860548", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vhab10/llama-3-8b-merged-linear", + "developer": "meta", + "inference_platform": "unknown", + "id": "vhab10/llama-3-8b-merged-linear" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5916634529714491 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49370937443498536 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08157099697885196 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4190520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37042885638297873 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.65 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B/469379ff-5526-44f4-be9b-8bf6185b917e.json b/data/hfopenllm_v2/meta/vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B/469379ff-5526-44f4-be9b-8bf6185b917e.json new file mode 100644 index 000000000..25522f83d --- /dev/null +++ b/data/hfopenllm_v2/meta/vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B/469379ff-5526-44f4-be9b-8bf6185b917e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vicgalle_Configurable-Hermes-2-Pro-Llama-3-8B/1762652580.5867279", + "retrieved_timestamp": "1762652580.586729", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5762510139762497 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5054841203275775 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07628398791540786 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4183645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3097573138297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.031 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/vicgalle/Humanish-RP-Llama-3.1-8B/3b0e49aa-931b-4625-8e59-fed02b31372e.json b/data/hfopenllm_v2/meta/vicgalle/Humanish-RP-Llama-3.1-8B/3b0e49aa-931b-4625-8e59-fed02b31372e.json new file mode 100644 index 000000000..857f373ed --- /dev/null +++ b/data/hfopenllm_v2/meta/vicgalle/Humanish-RP-Llama-3.1-8B/3b0e49aa-931b-4625-8e59-fed02b31372e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vicgalle_Humanish-RP-Llama-3.1-8B/1762652580.587956", + "retrieved_timestamp": "1762652580.587957", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vicgalle/Humanish-RP-Llama-3.1-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "vicgalle/Humanish-RP-Llama-3.1-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6669259786256023 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5100385476143247 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15181268882175228 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39520833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34765625 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/vicgalle/Roleplay-Llama-3-8B/89bafcc1-b175-45ec-b365-45938c1e8f33.json b/data/hfopenllm_v2/meta/vicgalle/Roleplay-Llama-3-8B/89bafcc1-b175-45ec-b365-45938c1e8f33.json new file mode 100644 index 000000000..bd7b35765 --- /dev/null +++ b/data/hfopenllm_v2/meta/vicgalle/Roleplay-Llama-3-8B/89bafcc1-b175-45ec-b365-45938c1e8f33.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vicgalle_Roleplay-Llama-3-8B/1762652580.5885959", + "retrieved_timestamp": "1762652580.588597", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vicgalle/Roleplay-Llama-3-8B", + "developer": "meta", + "inference_platform": "unknown", + "id": "vicgalle/Roleplay-Llama-3-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7320221456845614 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5012318206922323 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09138972809667674 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3528854166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.370844414893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/viettelsecurity-ai/security-llama3.2-3b/2176e0d8-e0a5-4118-b15f-b272dc643d89.json b/data/hfopenllm_v2/meta/viettelsecurity-ai/security-llama3.2-3b/2176e0d8-e0a5-4118-b15f-b272dc643d89.json new file mode 100644 index 000000000..a41d7ecfa --- /dev/null +++ b/data/hfopenllm_v2/meta/viettelsecurity-ai/security-llama3.2-3b/2176e0d8-e0a5-4118-b15f-b272dc643d89.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/viettelsecurity-ai_security-llama3.2-3b/1762652580.588792", + "retrieved_timestamp": "1762652580.588792", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "viettelsecurity-ai/security-llama3.2-3b", + "developer": "meta", + "inference_platform": "unknown", + "id": "viettelsecurity-ai/security-llama3.2-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5908888416069362 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44005776161052806 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12613293051359517 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33790625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2837433510638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/winglian/Llama-3-8b-64k-PoSE/76bbd348-21b9-4253-8085-d8c4eb0932f6.json b/data/hfopenllm_v2/meta/winglian/Llama-3-8b-64k-PoSE/76bbd348-21b9-4253-8085-d8c4eb0932f6.json new file mode 100644 index 000000000..ef5cd48c5 --- /dev/null +++ b/data/hfopenllm_v2/meta/winglian/Llama-3-8b-64k-PoSE/76bbd348-21b9-4253-8085-d8c4eb0932f6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/winglian_Llama-3-8b-64k-PoSE/1762652580.595902", + "retrieved_timestamp": "1762652580.595903", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "winglian/Llama-3-8b-64k-PoSE", + "developer": "meta", + "inference_platform": "unknown", + "id": "winglian/Llama-3-8b-64k-PoSE" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28569085581811815 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37021796005121793 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04154078549848943 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33955208333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2466755319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/winglian/llama-3-8b-256k-PoSE/5077856e-f85c-4395-8be9-e3e9bf3655cb.json b/data/hfopenllm_v2/meta/winglian/llama-3-8b-256k-PoSE/5077856e-f85c-4395-8be9-e3e9bf3655cb.json new file mode 100644 index 000000000..d895fdc44 --- /dev/null +++ b/data/hfopenllm_v2/meta/winglian/llama-3-8b-256k-PoSE/5077856e-f85c-4395-8be9-e3e9bf3655cb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/winglian_llama-3-8b-256k-PoSE/1762652580.5961442", + "retrieved_timestamp": "1762652580.596145", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "winglian/llama-3-8b-256k-PoSE", + "developer": "meta", + "inference_platform": "unknown", + "id": "winglian/llama-3-8b-256k-PoSE" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2909114482905358 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3156583397739859 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33155208333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1116190159574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/xinchen9/Llama3.1_8B_Instruct_CoT/eddb5bfc-d5ae-44bc-8ffd-b1d318b0e3d2.json b/data/hfopenllm_v2/meta/xinchen9/Llama3.1_8B_Instruct_CoT/eddb5bfc-d5ae-44bc-8ffd-b1d318b0e3d2.json new file mode 100644 index 000000000..72b59a848 --- /dev/null +++ b/data/hfopenllm_v2/meta/xinchen9/Llama3.1_8B_Instruct_CoT/eddb5bfc-d5ae-44bc-8ffd-b1d318b0e3d2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xinchen9_Llama3.1_8B_Instruct_CoT/1762652580.5972009", + "retrieved_timestamp": "1762652580.5972018", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xinchen9/Llama3.1_8B_Instruct_CoT", + "developer": "meta", + "inference_platform": "unknown", + "id": "xinchen9/Llama3.1_8B_Instruct_CoT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2973565694579272 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4398206147249642 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43706249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2878989361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/xinchen9/Llama3.1_CoT/4ccfc9fe-c222-490e-badd-bfeecc9ede91.json b/data/hfopenllm_v2/meta/xinchen9/Llama3.1_CoT/4ccfc9fe-c222-490e-badd-bfeecc9ede91.json new file mode 100644 index 000000000..6f77acd2e --- /dev/null +++ b/data/hfopenllm_v2/meta/xinchen9/Llama3.1_CoT/4ccfc9fe-c222-490e-badd-bfeecc9ede91.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xinchen9_Llama3.1_CoT/1762652580.597471", + "retrieved_timestamp": "1762652580.597472", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xinchen9/Llama3.1_CoT", + "developer": "meta", + "inference_platform": "unknown", + "id": "xinchen9/Llama3.1_CoT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22461624046419057 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43410143664277245 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43045833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2738530585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/xinchen9/Llama3.1_CoT_V1/501bff5b-2809-4af7-9600-d6471167b701.json b/data/hfopenllm_v2/meta/xinchen9/Llama3.1_CoT_V1/501bff5b-2809-4af7-9600-d6471167b701.json new file mode 100644 index 000000000..9c662ccce --- /dev/null +++ b/data/hfopenllm_v2/meta/xinchen9/Llama3.1_CoT_V1/501bff5b-2809-4af7-9600-d6471167b701.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xinchen9_Llama3.1_CoT_V1/1762652580.597682", + "retrieved_timestamp": "1762652580.597683", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xinchen9/Llama3.1_CoT_V1", + "developer": "meta", + "inference_platform": "unknown", + "id": "xinchen9/Llama3.1_CoT_V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2452991396162183 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4376001847280673 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03323262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45721875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2805019946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties/2bde390d-b448-4ac2-addd-215d722aa66b.json b/data/hfopenllm_v2/meta/yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties/2bde390d-b448-4ac2-addd-215d722aa66b.json new file mode 100644 index 000000000..8af2ccae3 --- /dev/null +++ b/data/hfopenllm_v2/meta/yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties/2bde390d-b448-4ac2-addd-215d722aa66b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yuvraj17_Llama3-8B-SuperNova-Spectrum-dare_ties/1762652580.6118348", + "retrieved_timestamp": "1762652580.6118348", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties", + "developer": "meta", + "inference_platform": "unknown", + "id": "yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4012708502329375 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4615794426716074 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42109375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35738031914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/meta/yuvraj17/Llama3-8B-abliterated-Spectrum-slerp/45cd6db1-064f-45d9-89f2-d931b4f82326.json b/data/hfopenllm_v2/meta/yuvraj17/Llama3-8B-abliterated-Spectrum-slerp/45cd6db1-064f-45d9-89f2-d931b4f82326.json new file mode 100644 index 000000000..125e0a919 --- /dev/null +++ b/data/hfopenllm_v2/meta/yuvraj17/Llama3-8B-abliterated-Spectrum-slerp/45cd6db1-064f-45d9-89f2-d931b4f82326.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yuvraj17_Llama3-8B-abliterated-Spectrum-slerp/1762652580.6120949", + "retrieved_timestamp": "1762652580.612096", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yuvraj17/Llama3-8B-abliterated-Spectrum-slerp", + "developer": "meta", + "inference_platform": "unknown", + "id": "yuvraj17/Llama3-8B-abliterated-Spectrum-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2884878788281759 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4977912063897858 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39982291666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32571476063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/1024m/PHI-4-Hindi/29f2c6ef-0685-43f9-800b-4f10ddc3ddf7.json b/data/hfopenllm_v2/microsoft/1024m/PHI-4-Hindi/29f2c6ef-0685-43f9-800b-4f10ddc3ddf7.json new file mode 100644 index 000000000..442ed6074 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/1024m/PHI-4-Hindi/29f2c6ef-0685-43f9-800b-4f10ddc3ddf7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/1024m_PHI-4-Hindi/1762652579.468371", + "retrieved_timestamp": "1762652579.4683719", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "1024m/PHI-4-Hindi", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "1024m/PHI-4-Hindi" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00816832670647216 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6710015642760666 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23338368580060423 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3976510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4913541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.523936170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/BlackBeenie/Neos-Phi-3-14B-v0.1/6d6aa9c5-cb3f-4c30-bd1a-ba951c9ad0e8.json b/data/hfopenllm_v2/microsoft/BlackBeenie/Neos-Phi-3-14B-v0.1/6d6aa9c5-cb3f-4c30-bd1a-ba951c9ad0e8.json new file mode 100644 index 000000000..3d9dc4e91 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/BlackBeenie/Neos-Phi-3-14B-v0.1/6d6aa9c5-cb3f-4c30-bd1a-ba951c9ad0e8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/BlackBeenie_Neos-Phi-3-14B-v0.1/1762652579.4966102", + "retrieved_timestamp": "1762652579.496611", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "BlackBeenie/Neos-Phi-3-14B-v0.1", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "BlackBeenie/Neos-Phi-3-14B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4022449323350931 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6211931530444463 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1782477341389728 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41254166666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45636635638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Daemontatox/Phi-4-COT/4ab23cde-aadb-424d-a88e-e7029a2f5c57.json b/data/hfopenllm_v2/microsoft/Daemontatox/Phi-4-COT/4ab23cde-aadb-424d-a88e-e7029a2f5c57.json new file mode 100644 index 000000000..d82241df0 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Daemontatox/Phi-4-COT/4ab23cde-aadb-424d-a88e-e7029a2f5c57.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_Phi-4-COT/1762652579.5296152", + "retrieved_timestamp": "1762652579.5296159", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/Phi-4-COT", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Daemontatox/Phi-4-COT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17930313789633728 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6172933868833469 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2243202416918429 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33557046979865773 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.453 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.500498670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Daemontatox/SphinX/118ee97a-cc78-4b4d-99c4-58d37b4a48ba.json b/data/hfopenllm_v2/microsoft/Daemontatox/SphinX/118ee97a-cc78-4b4d-99c4-58d37b4a48ba.json new file mode 100644 index 000000000..b78edbc6c --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Daemontatox/SphinX/118ee97a-cc78-4b4d-99c4-58d37b4a48ba.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_SphinX/1762652579.531104", + "retrieved_timestamp": "1762652579.531104", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/SphinX", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Daemontatox/SphinX" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5725042886208593 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5440583486084486 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3081570996978852 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44049999999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43658577127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Daemontatox/Sphinx2.0/07d85f99-840b-403a-bace-99712f3469b7.json b/data/hfopenllm_v2/microsoft/Daemontatox/Sphinx2.0/07d85f99-840b-403a-bace-99712f3469b7.json new file mode 100644 index 000000000..5994467ae --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Daemontatox/Sphinx2.0/07d85f99-840b-403a-bace-99712f3469b7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_Sphinx2.0/1762652579.531323", + "retrieved_timestamp": "1762652579.531324", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/Sphinx2.0", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Daemontatox/Sphinx2.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7123133286346892 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.647283976671531 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40181268882175225 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42603125000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5183676861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Daemontatox/TinySphinx/6d501ffa-e205-4522-9af5-7036463a5b05.json b/data/hfopenllm_v2/microsoft/Daemontatox/TinySphinx/6d501ffa-e205-4522-9af5-7036463a5b05.json new file mode 100644 index 000000000..b82c65282 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Daemontatox/TinySphinx/6d501ffa-e205-4522-9af5-7036463a5b05.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_TinySphinx/1762652579.5315351", + "retrieved_timestamp": "1762652579.5315359", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/TinySphinx", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Daemontatox/TinySphinx" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2566900269063862 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33098404240871354 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33276041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1697972074468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Daemontatox/TinySphinx2.0/da5d131c-5ae9-462e-87b1-92ead75eddb9.json b/data/hfopenllm_v2/microsoft/Daemontatox/TinySphinx2.0/da5d131c-5ae9-462e-87b1-92ead75eddb9.json new file mode 100644 index 000000000..b3c9637e9 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Daemontatox/TinySphinx2.0/da5d131c-5ae9-462e-87b1-92ead75eddb9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Daemontatox_TinySphinx2.0/1762652579.531743", + "retrieved_timestamp": "1762652579.531744", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Daemontatox/TinySphinx2.0", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Daemontatox/TinySphinx2.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25351733400710114 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3168407073661037 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0324773413897281 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33825 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1731216755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO/f9ce1ec0-e727-474b-acb7-1ba49311e355.json b/data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO/f9ce1ec0-e727-474b-acb7-1ba49311e355.json new file mode 100644 index 000000000..da7a6fe5a --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO/f9ce1ec0-e727-474b-acb7-1ba49311e355.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-14b-Phi-3-medium-ORPO/1762652579.53347", + "retrieved_timestamp": "1762652579.5334709", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4799055395240185 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6536184886648629 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20241691842900303 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3263422818791946 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48075 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47232380319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 13.96 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-phi-4-v2/4180c069-33e8-4109-9d35-dde82549ba26.json b/data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-phi-4-v2/4180c069-33e8-4109-9d35-dde82549ba26.json new file mode 100644 index 000000000..9b0d63d43 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-phi-4-v2/4180c069-33e8-4109-9d35-dde82549ba26.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-14b-phi-4-v2/1762652579.533969", + "retrieved_timestamp": "1762652579.533969", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Danielbrdz/Barcenas-14b-phi-4-v2", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Danielbrdz/Barcenas-14b-phi-4-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27747266142723526 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6573002324945257 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3217522658610272 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3783557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43994791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5243517287234043 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-phi-4/720029f0-41d5-4161-878e-4218f230455c.json b/data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-phi-4/720029f0-41d5-4161-878e-4218f230455c.json new file mode 100644 index 000000000..076139109 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Danielbrdz/Barcenas-14b-phi-4/720029f0-41d5-4161-878e-4218f230455c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-14b-phi-4/1762652579.533744", + "retrieved_timestamp": "1762652579.533744", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Danielbrdz/Barcenas-14b-phi-4", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Danielbrdz/Barcenas-14b-phi-4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0497590836757581 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6769303819643072 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2583081570996979 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38338926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5096770833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5174534574468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/DreadPoor/Morphing-8B-Model_Stock/0fd25475-5202-4cd1-b399-bfb8e113d85b.json b/data/hfopenllm_v2/microsoft/DreadPoor/Morphing-8B-Model_Stock/0fd25475-5202-4cd1-b399-bfb8e113d85b.json new file mode 100644 index 000000000..3d283c040 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/DreadPoor/Morphing-8B-Model_Stock/0fd25475-5202-4cd1-b399-bfb8e113d85b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_Morphing-8B-Model_Stock/1762652579.577464", + "retrieved_timestamp": "1762652579.577465", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/Morphing-8B-Model_Stock", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "DreadPoor/Morphing-8B-Model_Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.744536718130117 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5396942172954088 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18882175226586104 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4068645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38522273936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/EpistemeAI/DeepThinkers-Phi4/3c97155d-c086-42aa-af12-14316fcf723c.json b/data/hfopenllm_v2/microsoft/EpistemeAI/DeepThinkers-Phi4/3c97155d-c086-42aa-af12-14316fcf723c.json new file mode 100644 index 000000000..85eb6157f --- /dev/null +++ b/data/hfopenllm_v2/microsoft/EpistemeAI/DeepThinkers-Phi4/3c97155d-c086-42aa-af12-14316fcf723c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_DeepThinkers-Phi4/1762652579.599432", + "retrieved_timestamp": "1762652579.599433", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/DeepThinkers-Phi4", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "EpistemeAI/DeepThinkers-Phi4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6939786433330231 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6790415739665393 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45845921450151056 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34060402684563756 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3980625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5257646276595744 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/EpistemeAI/Fireball-12B-v1.13a-philosophers/38fae832-3d96-457d-851b-7fcded3f7796.json b/data/hfopenllm_v2/microsoft/EpistemeAI/Fireball-12B-v1.13a-philosophers/38fae832-3d96-457d-851b-7fcded3f7796.json new file mode 100644 index 000000000..31e81d8a1 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/EpistemeAI/Fireball-12B-v1.13a-philosophers/38fae832-3d96-457d-851b-7fcded3f7796.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-12B-v1.13a-philosophers/1762652579.60018", + "retrieved_timestamp": "1762652579.600181", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI/Fireball-12B-v1.13a-philosophers", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "EpistemeAI/Fireball-12B-v1.13a-philosophers" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08755324760524298 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5102697700597862 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04607250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4080729166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3366855053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos/a60477a1-b815-4c82-a9e9-f017cb7b5ec9.json b/data/hfopenllm_v2/microsoft/EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos/a60477a1-b815-4c82-a9e9-f017cb7b5ec9.json new file mode 100644 index 000000000..62ea445bf --- /dev/null +++ b/data/hfopenllm_v2/microsoft/EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos/a60477a1-b815-4c82-a9e9-f017cb7b5ec9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Phi-3-medium-4k-inst-Philos/1762652579.612791", + "retrieved_timestamp": "1762652579.612792", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5312880933700359 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6177842639287514 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17069486404833836 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33221476510067116 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41390625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45985704787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 13.96 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/FINGU-AI/Phi-4-RRStock/9d85345f-d46b-4431-b5fb-5cca99d92f21.json b/data/hfopenllm_v2/microsoft/FINGU-AI/Phi-4-RRStock/9d85345f-d46b-4431-b5fb-5cca99d92f21.json new file mode 100644 index 000000000..78cd7ff3f --- /dev/null +++ b/data/hfopenllm_v2/microsoft/FINGU-AI/Phi-4-RRStock/9d85345f-d46b-4431-b5fb-5cca99d92f21.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FINGU-AI_Phi-4-RRStock/1762652579.616194", + "retrieved_timestamp": "1762652579.616194", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FINGU-AI/Phi-4-RRStock", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "FINGU-AI/Phi-4-RRStock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28554125276488607 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6443442865581455 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0581570996978852 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3800335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44794791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48828125 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.652 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/HeraiHench/Phi-4-slerp-ReasoningRP-14B/ca0a3f22-099f-4207-acfe-4b70aa00171e.json b/data/hfopenllm_v2/microsoft/HeraiHench/Phi-4-slerp-ReasoningRP-14B/ca0a3f22-099f-4207-acfe-4b70aa00171e.json new file mode 100644 index 000000000..8a5207468 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/HeraiHench/Phi-4-slerp-ReasoningRP-14B/ca0a3f22-099f-4207-acfe-4b70aa00171e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/HeraiHench_Phi-4-slerp-ReasoningRP-14B/1762652579.639999", + "retrieved_timestamp": "1762652579.64", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "HeraiHench/Phi-4-slerp-ReasoningRP-14B", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "HeraiHench/Phi-4-slerp-ReasoningRP-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15754642127333254 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41957191458446336 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3116145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18999335106382978 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 9.207 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Josephgflowers/Cinder-Phi-2-V1-F16-gguf/4d0a565c-14b2-4ce9-97c0-4d114548fe48.json b/data/hfopenllm_v2/microsoft/Josephgflowers/Cinder-Phi-2-V1-F16-gguf/4d0a565c-14b2-4ce9-97c0-4d114548fe48.json new file mode 100644 index 000000000..e2754698a --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Josephgflowers/Cinder-Phi-2-V1-F16-gguf/4d0a565c-14b2-4ce9-97c0-4d114548fe48.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Josephgflowers_Cinder-Phi-2-V1-F16-gguf/1762652579.694953", + "retrieved_timestamp": "1762652579.694954", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Josephgflowers/Cinder-Phi-2-V1-F16-gguf", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Josephgflowers/Cinder-Phi-2-V1-F16-gguf" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23565694579271884 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4396616219689493 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02416918429003021 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34345833333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2160904255319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.1-phi3-4b/79b4a850-85b6-45aa-8cc1-5210230a38aa.json b/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.1-phi3-4b/79b4a850-85b6-45aa-8cc1-5210230a38aa.json new file mode 100644 index 000000000..8e813b5ab --- /dev/null +++ b/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.1-phi3-4b/79b4a850-85b6-45aa-8cc1-5210230a38aa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-phi3-4b/1762652579.751861", + "retrieved_timestamp": "1762652579.751862", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.1-phi3-4b", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.1-phi3-4b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.552520645221346 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5595320442699866 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13141993957703926 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3296979865771812 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40153124999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3745844414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.1-phi3.5-4b/69433e39-158a-46df-a987-ac2a6b3af2af.json b/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.1-phi3.5-4b/69433e39-158a-46df-a987-ac2a6b3af2af.json new file mode 100644 index 000000000..13acc94bf --- /dev/null +++ b/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.1-phi3.5-4b/69433e39-158a-46df-a987-ac2a6b3af2af.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-phi3.5-4b/1762652579.752121", + "retrieved_timestamp": "1762652579.7521222", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.1-phi3.5-4b", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.1-phi3.5-4b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5659095644002359 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5483695590203843 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34395973154362414 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3994583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3935339095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.2-phi3-4b/56593987-babd-4a30-9a20-f83e7d233809.json b/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.2-phi3-4b/56593987-babd-4a30-9a20-f83e7d233809.json new file mode 100644 index 000000000..45e505aaf --- /dev/null +++ b/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.2-phi3-4b/56593987-babd-4a30-9a20-f83e7d233809.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-phi3-4b/1762652579.7536151", + "retrieved_timestamp": "1762652579.7536159", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.2-phi3-4b", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.2-phi3-4b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5069083365470286 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5529604896487258 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14501510574018128 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3975625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3813996010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.3-phi3-4b/99b96f53-5ac6-4001-abc6-2a4e43f09028.json b/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.3-phi3-4b/99b96f53-5ac6-4001-abc6-2a4e43f09028.json new file mode 100644 index 000000000..b321d69d5 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/MaziyarPanahi/calme-2.3-phi3-4b/99b96f53-5ac6-4001-abc6-2a4e43f09028.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-phi3-4b/1762652579.755463", + "retrieved_timestamp": "1762652579.755465", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "MaziyarPanahi/calme-2.3-phi3-4b", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "MaziyarPanahi/calme-2.3-phi3-4b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49264507063480456 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5537867816134527 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1472809667673716 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3988333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3828125 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-14b/cae2d4a1-4632-420f-be40-594f4c001d4d.json b/data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-14b/cae2d4a1-4632-420f-be40-594f4c001d4d.json new file mode 100644 index 000000000..e660dd0ba --- /dev/null +++ b/data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-14b/cae2d4a1-4632-420f-be40-594f4c001d4d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NikolaSigmoid_phi-4-14b/1762652579.784184", + "retrieved_timestamp": "1762652579.7841852", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NikolaSigmoid/phi-4-14b", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "NikolaSigmoid/phi-4-14b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05607898154674043 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.669500080799667 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2938066465256798 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4035234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5046875000000001 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.527842420212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "", + "params_billions": 14.704 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-1steps/a4763c48-f2ab-4f3e-bc1f-a7f4a9f33cf8.json b/data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-1steps/a4763c48-f2ab-4f3e-bc1f-a7f4a9f33cf8.json new file mode 100644 index 000000000..558f62c35 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-1steps/a4763c48-f2ab-4f3e-bc1f-a7f4a9f33cf8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NikolaSigmoid_phi-4-1steps/1762652579.784436", + "retrieved_timestamp": "1762652579.784437", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NikolaSigmoid/phi-4-1steps", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "NikolaSigmoid/phi-4-1steps" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05275668559422333 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6707359457278651 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2983383685800604 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40184563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5020520833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.52734375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "", + "params_billions": 14.704 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-300steps/e54de9df-52e5-43d2-92c3-9d5207c0e335.json b/data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-300steps/e54de9df-52e5-43d2-92c3-9d5207c0e335.json new file mode 100644 index 000000000..ba1ea3e00 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/NikolaSigmoid/phi-4-300steps/e54de9df-52e5-43d2-92c3-9d5207c0e335.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NikolaSigmoid_phi-4-300steps/1762652579.784649", + "retrieved_timestamp": "1762652579.78465", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NikolaSigmoid/phi-4-300steps", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "NikolaSigmoid/phi-4-300steps" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05607898154674043 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6701123802649077 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4052013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5033541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5287566489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "", + "params_billions": 14.704 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP/582f87ef-50c5-4a5b-9d76-bc71f97bd2fb.json b/data/hfopenllm_v2/microsoft/Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP/582f87ef-50c5-4a5b-9d76-bc71f97bd2fb.json new file mode 100644 index 000000000..c5f5a3683 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP/582f87ef-50c5-4a5b-9d76-bc71f97bd2fb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Novaciano_Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP/1762652579.7955709", + "retrieved_timestamp": "1762652579.795572", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5342856952885011 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35023897852759145 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10498489425981873 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3183125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1823470744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/NyxKrage/Microsoft_Phi-4/46494bad-fb41-4fa3-b568-be4e6a22ae5b.json b/data/hfopenllm_v2/microsoft/NyxKrage/Microsoft_Phi-4/46494bad-fb41-4fa3-b568-be4e6a22ae5b.json new file mode 100644 index 000000000..5310bf94b --- /dev/null +++ b/data/hfopenllm_v2/microsoft/NyxKrage/Microsoft_Phi-4/46494bad-fb41-4fa3-b568-be4e6a22ae5b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NyxKrage_Microsoft_Phi-4/1762652579.7969122", + "retrieved_timestamp": "1762652579.796913", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NyxKrage/Microsoft_Phi-4", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "NyxKrage/Microsoft_Phi-4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0585269307659233 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6690562305322874 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2990936555891239 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40604026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5033541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5286735372340425 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Orca-2-13b/4f9c7197-1eb6-45eb-851e-46707017fe7f.json b/data/hfopenllm_v2/microsoft/Orca-2-13b/4f9c7197-1eb6-45eb-851e-46707017fe7f.json new file mode 100644 index 000000000..54849eb1f --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Orca-2-13b/4f9c7197-1eb6-45eb-851e-46707017fe7f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/microsoft_Orca-2-13b/1762652580.3541", + "retrieved_timestamp": "1762652580.3541", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "microsoft/Orca-2-13b", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "microsoft/Orca-2-13b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3127933882099496 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48844897288396094 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03172205438066465 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5129687500000001 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27493351063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Orca-2-7b/c13a5d55-44f7-43fc-a633-9af7677a26fb.json b/data/hfopenllm_v2/microsoft/Orca-2-7b/c13a5d55-44f7-43fc-a633-9af7677a26fb.json new file mode 100644 index 000000000..2ff396fb9 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Orca-2-7b/c13a5d55-44f7-43fc-a633-9af7677a26fb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/microsoft_Orca-2-7b/1762652580.354311", + "retrieved_timestamp": "1762652580.354312", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "microsoft/Orca-2-7b", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "microsoft/Orca-2-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2183462102776189 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4452132267545943 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5026145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23188164893617022 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Orion-zhen/phi-4-abliterated/3970f988-26f6-4810-839a-e5f4fcd6618a.json b/data/hfopenllm_v2/microsoft/Orion-zhen/phi-4-abliterated/3970f988-26f6-4810-839a-e5f4fcd6618a.json new file mode 100644 index 000000000..bdb7600f4 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Orion-zhen/phi-4-abliterated/3970f988-26f6-4810-839a-e5f4fcd6618a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Orion-zhen_phi-4-abliterated/1762652579.808864", + "retrieved_timestamp": "1762652579.808865", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Orion-zhen/phi-4-abliterated", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Orion-zhen/phi-4-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05760271634817839 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6698239306664778 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3021148036253776 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40436241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.500625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5291722074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Phi-3-medium-128k-instruct/0c2670d3-1fb5-4825-860f-dc84dbd7bb99.json b/data/hfopenllm_v2/microsoft/Phi-3-medium-128k-instruct/0c2670d3-1fb5-4825-860f-dc84dbd7bb99.json new file mode 100644 index 000000000..18046884c --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Phi-3-medium-128k-instruct/0c2670d3-1fb5-4825-860f-dc84dbd7bb99.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-medium-128k-instruct/1762652580.354526", + "retrieved_timestamp": "1762652580.354527", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "microsoft/Phi-3-medium-128k-instruct", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "microsoft/Phi-3-medium-128k-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6040029344361849 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6382322530870549 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19184290030211482 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33640939597315433 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4129479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47116023936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Phi-3-medium-4k-instruct/1b921ad2-9ed3-46d5-ab65-f125ce97b35f.json b/data/hfopenllm_v2/microsoft/Phi-3-medium-4k-instruct/1b921ad2-9ed3-46d5-ab65-f125ce97b35f.json new file mode 100644 index 000000000..03ef8bc6c --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Phi-3-medium-4k-instruct/1b921ad2-9ed3-46d5-ab65-f125ce97b35f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-medium-4k-instruct/1762652580.354986", + "retrieved_timestamp": "1762652580.35499", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "microsoft/Phi-3-medium-4k-instruct", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "microsoft/Phi-3-medium-4k-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6422713954529538 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6412464890555547 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19561933534743203 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33640939597315433 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42575 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4675864361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Phi-3-mini-128k-instruct/0bcfeb34-8944-4f16-83d8-6fe851c39af6.json b/data/hfopenllm_v2/microsoft/Phi-3-mini-128k-instruct/0bcfeb34-8944-4f16-83d8-6fe851c39af6.json new file mode 100644 index 000000000..17204a4c1 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Phi-3-mini-128k-instruct/0bcfeb34-8944-4f16-83d8-6fe851c39af6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-mini-128k-instruct/1762652580.355347", + "retrieved_timestamp": "1762652580.3553479", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "microsoft/Phi-3-mini-128k-instruct", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "microsoft/Phi-3-mini-128k-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5976331688807919 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5574531792679852 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1404833836858006 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3936875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3734208776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/0c861cdd-1ddb-43a1-991b-300887e1da1b.json b/data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/0c861cdd-1ddb-43a1-991b-300887e1da1b.json new file mode 100644 index 000000000..2a21db048 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/0c861cdd-1ddb-43a1-991b-300887e1da1b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-mini-4k-instruct/1762652580.355623", + "retrieved_timestamp": "1762652580.355624", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "microsoft/Phi-3-mini-4k-instruct", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "microsoft/Phi-3-mini-4k-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5612884923115112 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5675972626334875 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1163141993957704 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3950208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38663563829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/97e50198-ba06-4c17-81d3-59270b71a89d.json b/data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/97e50198-ba06-4c17-81d3-59270b71a89d.json new file mode 100644 index 000000000..a70ef8dc2 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Phi-3-mini-4k-instruct/97e50198-ba06-4c17-81d3-59270b71a89d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-mini-4k-instruct/1762652580.355825", + "retrieved_timestamp": "1762652580.355826", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "microsoft/Phi-3-mini-4k-instruct", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "microsoft/Phi-3-mini-4k-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.547674614467391 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5490718919495822 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16389728096676737 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33221476510067116 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42841666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4021775265957447 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Phi-3-small-128k-instruct/f7c1a443-006b-4ade-9b0f-895392e52b7c.json b/data/hfopenllm_v2/microsoft/Phi-3-small-128k-instruct/f7c1a443-006b-4ade-9b0f-895392e52b7c.json new file mode 100644 index 000000000..da0bdaff4 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Phi-3-small-128k-instruct/f7c1a443-006b-4ade-9b0f-895392e52b7c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-small-128k-instruct/1762652580.356006", + "retrieved_timestamp": "1762652580.356006", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "microsoft/Phi-3-small-128k-instruct", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "microsoft/Phi-3-small-128k-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6368258443153056 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6202176778696983 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2026086956521739 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43784375000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4490525265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3SmallForCausalLM", + "params_billions": 7.392 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Phi-3-small-8k-instruct/f4c62b5d-fc1d-4421-9be8-e7e4af642284.json b/data/hfopenllm_v2/microsoft/Phi-3-small-8k-instruct/f4c62b5d-fc1d-4421-9be8-e7e4af642284.json new file mode 100644 index 000000000..53e68f07f --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Phi-3-small-8k-instruct/f4c62b5d-fc1d-4421-9be8-e7e4af642284.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-small-8k-instruct/1762652580.356211", + "retrieved_timestamp": "1762652580.356212", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "microsoft/Phi-3-small-8k-instruct", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "microsoft/Phi-3-small-8k-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6496651107949131 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6208364880870563 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18869565217391304 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45579166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4506316489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3SmallForCausalLM", + "params_billions": 7.392 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Phi-3.5-MoE-instruct/ae57c3e7-4042-43eb-baa2-b033d1b4867c.json b/data/hfopenllm_v2/microsoft/Phi-3.5-MoE-instruct/ae57c3e7-4042-43eb-baa2-b033d1b4867c.json new file mode 100644 index 000000000..ce486cfd8 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Phi-3.5-MoE-instruct/ae57c3e7-4042-43eb-baa2-b033d1b4867c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/microsoft_Phi-3.5-MoE-instruct/1762652580.356415", + "retrieved_timestamp": "1762652580.356415", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "microsoft/Phi-3.5-MoE-instruct", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "microsoft/Phi-3.5-MoE-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.692454908531585 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.640762564622586 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3119335347432024 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35570469798657717 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4564791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46575797872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 42.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Phi-3.5-mini-instruct/42448d73-f9e0-4eb2-bd6a-74614d08d55c.json b/data/hfopenllm_v2/microsoft/Phi-3.5-mini-instruct/42448d73-f9e0-4eb2-bd6a-74614d08d55c.json new file mode 100644 index 000000000..2edd04663 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Phi-3.5-mini-instruct/42448d73-f9e0-4eb2-bd6a-74614d08d55c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/microsoft_Phi-3.5-mini-instruct/1762652580.356627", + "retrieved_timestamp": "1762652580.356628", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "microsoft/Phi-3.5-mini-instruct", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "microsoft/Phi-3.5-mini-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5774500547436359 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5517785126111956 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19637462235649547 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33976510067114096 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.402125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39619348404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Phi-4-mini-instruct/1d02fe1c-f31d-4d38-a8c3-dc427e25cb80.json b/data/hfopenllm_v2/microsoft/Phi-4-mini-instruct/1d02fe1c-f31d-4d38-a8c3-dc427e25cb80.json new file mode 100644 index 000000000..f5c81f74a --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Phi-4-mini-instruct/1d02fe1c-f31d-4d38-a8c3-dc427e25cb80.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/microsoft_Phi-4-mini-instruct/1762652580.356846", + "retrieved_timestamp": "1762652580.356847", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "microsoft/Phi-4-mini-instruct", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "microsoft/Phi-4-mini-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7377923908562614 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.568862935505404 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16993957703927492 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3873020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39320146276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.836 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Quazim0t0/CoT_Phi/ed579ba1-fcd3-4279-ac93-d0340a771e43.json b/data/hfopenllm_v2/microsoft/Quazim0t0/CoT_Phi/ed579ba1-fcd3-4279-ac93-d0340a771e43.json new file mode 100644 index 000000000..08c7fd902 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Quazim0t0/CoT_Phi/ed579ba1-fcd3-4279-ac93-d0340a771e43.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_CoT_Phi/1762652579.820767", + "retrieved_timestamp": "1762652579.820768", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/CoT_Phi", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Quazim0t0/CoT_Phi" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6158681188136367 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6750841958594904 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33081570996978854 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35822147651006714 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42435416666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4901097074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Quazim0t0/Lo-Phi-14b/b37d3d27-5ba0-44d6-bd19-1196a98b75b4.json b/data/hfopenllm_v2/microsoft/Quazim0t0/Lo-Phi-14b/b37d3d27-5ba0-44d6-bd19-1196a98b75b4.json new file mode 100644 index 000000000..c2e5e82d8 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Quazim0t0/Lo-Phi-14b/b37d3d27-5ba0-44d6-bd19-1196a98b75b4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Lo-Phi-14b/1762652579.825307", + "retrieved_timestamp": "1762652579.8253078", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Lo-Phi-14b", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Quazim0t0/Lo-Phi-14b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4941189377518318 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6851928144814953 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5196374622356495 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42323958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5369015957446809 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Quazim0t0/Math_Phi4_Reason/1c2a87ca-9f1a-4d32-b1da-743927b722b0.json b/data/hfopenllm_v2/microsoft/Quazim0t0/Math_Phi4_Reason/1c2a87ca-9f1a-4d32-b1da-743927b722b0.json new file mode 100644 index 000000000..edd469896 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Quazim0t0/Math_Phi4_Reason/1c2a87ca-9f1a-4d32-b1da-743927b722b0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Math_Phi4_Reason/1762652579.826147", + "retrieved_timestamp": "1762652579.826147", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Math_Phi4_Reason", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Quazim0t0/Math_Phi4_Reason" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3220111526305758 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6240212275403677 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32779456193353473 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4034270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5029920212765957 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Quazim0t0/Phi4.Turn.R1Distill.16bit/44749932-f3e3-45ad-bb4b-135a6d656e3b.json b/data/hfopenllm_v2/microsoft/Quazim0t0/Phi4.Turn.R1Distill.16bit/44749932-f3e3-45ad-bb4b-135a6d656e3b.json new file mode 100644 index 000000000..3b86c3602 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Quazim0t0/Phi4.Turn.R1Distill.16bit/44749932-f3e3-45ad-bb4b-135a6d656e3b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Phi4.Turn.R1Distill.16bit/1762652579.8283992", + "retrieved_timestamp": "1762652579.8283992", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Phi4.Turn.R1Distill.16bit", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Quazim0t0/Phi4.Turn.R1Distill.16bit" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31264378515671754 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6563340892011863 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2311178247734139 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39021875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5256815159574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors/5f1b91c8-28d0-4274-8979-32416003fafb.json b/data/hfopenllm_v2/microsoft/Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors/5f1b91c8-28d0-4274-8979-32416003fafb.json new file mode 100644 index 000000000..32bc5ce18 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors/5f1b91c8-28d0-4274-8979-32416003fafb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Phi4.Turn.R1Distill_v1.5.1-Tensors/1762652579.8286002", + "retrieved_timestamp": "1762652579.8286011", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2995296923274689 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.645570250166195 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39285416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.51171875 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Quazim0t0/Phi4Basis-14B-sce/d101111a-31bd-4eec-9a53-52543f6d5fd5.json b/data/hfopenllm_v2/microsoft/Quazim0t0/Phi4Basis-14B-sce/d101111a-31bd-4eec-9a53-52543f6d5fd5.json new file mode 100644 index 000000000..3abedb59a --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Quazim0t0/Phi4Basis-14B-sce/d101111a-31bd-4eec-9a53-52543f6d5fd5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_Phi4Basis-14B-sce/1762652579.828811", + "retrieved_timestamp": "1762652579.8288121", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/Phi4Basis-14B-sce", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Quazim0t0/Phi4Basis-14B-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6501648958097848 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6909074263536413 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4788519637462236 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288590604026846 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43378125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5389793882978723 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Quazim0t0/ThinkPhi1.1-Tensors/056e62d9-ab3e-4bf3-8693-47a5aea7f84f.json b/data/hfopenllm_v2/microsoft/Quazim0t0/ThinkPhi1.1-Tensors/056e62d9-ab3e-4bf3-8693-47a5aea7f84f.json new file mode 100644 index 000000000..2845e6290 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Quazim0t0/ThinkPhi1.1-Tensors/056e62d9-ab3e-4bf3-8693-47a5aea7f84f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_ThinkPhi1.1-Tensors/1762652579.831269", + "retrieved_timestamp": "1762652579.831269", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/ThinkPhi1.1-Tensors", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Quazim0t0/ThinkPhi1.1-Tensors" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3907543096761038 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6449416604455037 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18202416918429004 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.418 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4907746010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Quazim0t0/graphite-14b-sce/bd98b886-a899-4022-aee4-09ea0e491fe3.json b/data/hfopenllm_v2/microsoft/Quazim0t0/graphite-14b-sce/bd98b886-a899-4022-aee4-09ea0e491fe3.json new file mode 100644 index 000000000..810b62389 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Quazim0t0/graphite-14b-sce/bd98b886-a899-4022-aee4-09ea0e491fe3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Quazim0t0_graphite-14b-sce/1762652579.833386", + "retrieved_timestamp": "1762652579.833387", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Quazim0t0/graphite-14b-sce", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Quazim0t0/graphite-14b-sce" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3216864585965239 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6631420093244736 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30060422960725075 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.398125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5280086436170213 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Sakalti/Phi3.5-Comets-3.8B/7d9a3955-232c-4a93-b879-bd065bab4768.json b/data/hfopenllm_v2/microsoft/Sakalti/Phi3.5-Comets-3.8B/7d9a3955-232c-4a93-b879-bd065bab4768.json new file mode 100644 index 000000000..eeb62ffe5 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Sakalti/Phi3.5-Comets-3.8B/7d9a3955-232c-4a93-b879-bd065bab4768.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sakalti_Phi3.5-Comets-3.8B/1762652579.858093", + "retrieved_timestamp": "1762652579.858093", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sakalti/Phi3.5-Comets-3.8B", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Sakalti/Phi3.5-Comets-3.8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20942876013422163 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3335116874180515 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0007552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24916107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3763541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11527593085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/SicariusSicariiStuff/Phi-Line_14B/12b2a13d-2b38-47e6-a6d2-3d5a30bff5ae.json b/data/hfopenllm_v2/microsoft/SicariusSicariiStuff/Phi-Line_14B/12b2a13d-2b38-47e6-a6d2-3d5a30bff5ae.json new file mode 100644 index 000000000..2b3d07eb1 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/SicariusSicariiStuff/Phi-Line_14B/12b2a13d-2b38-47e6-a6d2-3d5a30bff5ae.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Phi-Line_14B/1762652579.8832798", + "retrieved_timestamp": "1762652579.8832798", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SicariusSicariiStuff/Phi-Line_14B", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "SicariusSicariiStuff/Phi-Line_14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6495653754260917 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6154430096216078 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3859516616314199 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35318791946308725 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44785416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5453789893617021 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/SicariusSicariiStuff/Phi-lthy4/56fa06dd-fd07-4613-9ac5-81c739cb6a64.json b/data/hfopenllm_v2/microsoft/SicariusSicariiStuff/Phi-lthy4/56fa06dd-fd07-4613-9ac5-81c739cb6a64.json new file mode 100644 index 000000000..085a12906 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/SicariusSicariiStuff/Phi-lthy4/56fa06dd-fd07-4613-9ac5-81c739cb6a64.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Phi-lthy4/1762652579.883529", + "retrieved_timestamp": "1762652579.88353", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "SicariusSicariiStuff/Phi-lthy4", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "SicariusSicariiStuff/Phi-lthy4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7679423928509688 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.587935701572946 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13670694864048338 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40829166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.433344414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 11.933 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Triangle104/Phi-4-AbliteratedRP/ef628438-c2ff-4939-8bf1-09f1df25fd15.json b/data/hfopenllm_v2/microsoft/Triangle104/Phi-4-AbliteratedRP/ef628438-c2ff-4939-8bf1-09f1df25fd15.json new file mode 100644 index 000000000..9a7abc1fd --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Triangle104/Phi-4-AbliteratedRP/ef628438-c2ff-4939-8bf1-09f1df25fd15.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Phi-4-AbliteratedRP/1762652579.931047", + "retrieved_timestamp": "1762652579.931048", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Phi-4-AbliteratedRP", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Triangle104/Phi-4-AbliteratedRP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49227050891634194 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6708776140201277 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3074018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3951342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5098333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.530751329787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Triangle104/Phi4-RP-o1-Ablit/c3578998-b9dc-4b42-a8cb-0bdf05cffc9f.json b/data/hfopenllm_v2/microsoft/Triangle104/Phi4-RP-o1-Ablit/c3578998-b9dc-4b42-a8cb-0bdf05cffc9f.json new file mode 100644 index 000000000..1feb7d2ea --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Triangle104/Phi4-RP-o1-Ablit/c3578998-b9dc-4b42-a8cb-0bdf05cffc9f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Phi4-RP-o1-Ablit/1762652579.93156", + "retrieved_timestamp": "1762652579.93156", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Phi4-RP-o1-Ablit", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Triangle104/Phi4-RP-o1-Ablit" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02385559205131274 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6629825730619672 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38821752265861026 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36325503355704697 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47541666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5104720744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Triangle104/Phi4-RP-o1/9ed49666-aee1-43d0-8c7c-98c178860f0c.json b/data/hfopenllm_v2/microsoft/Triangle104/Phi4-RP-o1/9ed49666-aee1-43d0-8c7c-98c178860f0c.json new file mode 100644 index 000000000..445dc4b35 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Triangle104/Phi4-RP-o1/9ed49666-aee1-43d0-8c7c-98c178860f0c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Phi4-RP-o1/1762652579.9312892", + "retrieved_timestamp": "1762652579.9312901", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Phi4-RP-o1", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Triangle104/Phi4-RP-o1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.022007163215822904 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6652563961373095 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3776435045317221 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3733221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4755729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5110538563829787 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Undi95/Phi4-abliterated/29c3f781-f49c-4afc-bbc4-a47aebc91f71.json b/data/hfopenllm_v2/microsoft/Undi95/Phi4-abliterated/29c3f781-f49c-4afc-bbc4-a47aebc91f71.json new file mode 100644 index 000000000..ccbd2bbb3 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Undi95/Phi4-abliterated/29c3f781-f49c-4afc-bbc4-a47aebc91f71.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Undi95_Phi4-abliterated/1762652579.9391701", + "retrieved_timestamp": "1762652579.939171", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Undi95/Phi4-abliterated", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Undi95/Phi4-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6617552538375954 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.680902103041113 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37009063444108764 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4034270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.528091755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/VAGOsolutions/SauerkrautLM-Phi-3-medium/ae8b39a7-7fca-441f-bae3-8db76879cefe.json b/data/hfopenllm_v2/microsoft/VAGOsolutions/SauerkrautLM-Phi-3-medium/ae8b39a7-7fca-441f-bae3-8db76879cefe.json new file mode 100644 index 000000000..7891795dc --- /dev/null +++ b/data/hfopenllm_v2/microsoft/VAGOsolutions/SauerkrautLM-Phi-3-medium/ae8b39a7-7fca-441f-bae3-8db76879cefe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Phi-3-medium/1762652579.942282", + "retrieved_timestamp": "1762652579.942282", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "VAGOsolutions/SauerkrautLM-Phi-3-medium", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "VAGOsolutions/SauerkrautLM-Phi-3-medium" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4408879550703245 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6432931765847228 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16012084592145015 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3347315436241611 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4845 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46650598404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 13.96 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Xiaojian9992024/Phi-4-Megatron-Empathetic/aec0af15-927b-48bd-a889-d4715aff4c42.json b/data/hfopenllm_v2/microsoft/Xiaojian9992024/Phi-4-Megatron-Empathetic/aec0af15-927b-48bd-a889-d4715aff4c42.json new file mode 100644 index 000000000..af4220d3f --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Xiaojian9992024/Phi-4-Megatron-Empathetic/aec0af15-927b-48bd-a889-d4715aff4c42.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Phi-4-Megatron-Empathetic/1762652579.952935", + "retrieved_timestamp": "1762652579.952936", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Xiaojian9992024/Phi-4-Megatron-Empathetic", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Xiaojian9992024/Phi-4-Megatron-Empathetic" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01726086783068924 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6673396558729835 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26963746223564955 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3859060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5071354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5082280585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Xiaojian9992024/Phi-4-mini-UNOFFICAL/058de011-1e80-4a6d-803f-8ba7f927cd7f.json b/data/hfopenllm_v2/microsoft/Xiaojian9992024/Phi-4-mini-UNOFFICAL/058de011-1e80-4a6d-803f-8ba7f927cd7f.json new file mode 100644 index 000000000..b5902a57e --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Xiaojian9992024/Phi-4-mini-UNOFFICAL/058de011-1e80-4a6d-803f-8ba7f927cd7f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Phi-4-mini-UNOFFICAL/1762652579.9531882", + "retrieved_timestamp": "1762652579.9531891", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Xiaojian9992024/Phi-4-mini-UNOFFICAL", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Xiaojian9992024/Phi-4-mini-UNOFFICAL" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12732106366662677 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29444372790183987 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2407718120805369 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3368229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11444481382978723 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.754 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/Youlln/3PRYMMAL-PHI3-3B-SLERP/2c53181b-8681-46ad-b739-396b1ecb163c.json b/data/hfopenllm_v2/microsoft/Youlln/3PRYMMAL-PHI3-3B-SLERP/2c53181b-8681-46ad-b739-396b1ecb163c.json new file mode 100644 index 000000000..8fcf0636b --- /dev/null +++ b/data/hfopenllm_v2/microsoft/Youlln/3PRYMMAL-PHI3-3B-SLERP/2c53181b-8681-46ad-b739-396b1ecb163c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Youlln_3PRYMMAL-PHI3-3B-SLERP/1762652579.9609358", + "retrieved_timestamp": "1762652579.960937", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Youlln/3PRYMMAL-PHI3-3B-SLERP", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "Youlln/3PRYMMAL-PHI3-3B-SLERP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3655500738041729 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5421833887682153 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1714501510574018 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3263422818791946 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46484375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4001828457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/abideen/MedPhi-4-14B-v1/0367a9de-960b-4c1d-8e63-8dea06197bfa.json b/data/hfopenllm_v2/microsoft/abideen/MedPhi-4-14B-v1/0367a9de-960b-4c1d-8e63-8dea06197bfa.json new file mode 100644 index 000000000..eb07f67c4 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/abideen/MedPhi-4-14B-v1/0367a9de-960b-4c1d-8e63-8dea06197bfa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/abideen_MedPhi-4-14B-v1/1762652579.973941", + "retrieved_timestamp": "1762652579.973942", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "abideen/MedPhi-4-14B-v1", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "abideen/MedPhi-4-14B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6276834355066778 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6896781879584077 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2930513595166163 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34395973154362414 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4154583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5338264627659575 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/allknowingroger/MistralPhi3-11B/f7f557cf-4c63-444a-8c8f-515796b9b127.json b/data/hfopenllm_v2/microsoft/allknowingroger/MistralPhi3-11B/f7f557cf-4c63-444a-8c8f-515796b9b127.json new file mode 100644 index 000000000..788cfa874 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/allknowingroger/MistralPhi3-11B/f7f557cf-4c63-444a-8c8f-515796b9b127.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_MistralPhi3-11B/1762652579.990464", + "retrieved_timestamp": "1762652579.990464", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/MistralPhi3-11B", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "allknowingroger/MistralPhi3-11B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1942911474886634 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6234314600705605 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33221476510067116 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4266770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46875 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 11.234 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/allknowingroger/Phi3mash1-17B-pass/83ec9172-5769-4737-a766-0ca2006dd3e4.json b/data/hfopenllm_v2/microsoft/allknowingroger/Phi3mash1-17B-pass/83ec9172-5769-4737-a766-0ca2006dd3e4.json new file mode 100644 index 000000000..2ad15e6e7 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/allknowingroger/Phi3mash1-17B-pass/83ec9172-5769-4737-a766-0ca2006dd3e4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Phi3mash1-17B-pass/1762652579.997936", + "retrieved_timestamp": "1762652579.997937", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Phi3mash1-17B-pass", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "allknowingroger/Phi3mash1-17B-pass" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18842116694814204 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6128878795560929 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.445125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45894281914893614 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 16.687 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/allknowingroger/ROGERphi-7B-slerp/9e7ef237-2e59-429d-9784-45de952f60af.json b/data/hfopenllm_v2/microsoft/allknowingroger/ROGERphi-7B-slerp/9e7ef237-2e59-429d-9784-45de952f60af.json new file mode 100644 index 000000000..ac036384c --- /dev/null +++ b/data/hfopenllm_v2/microsoft/allknowingroger/ROGERphi-7B-slerp/9e7ef237-2e59-429d-9784-45de952f60af.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_ROGERphi-7B-slerp/1762652580.0022678", + "retrieved_timestamp": "1762652580.002269", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/ROGERphi-7B-slerp", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "allknowingroger/ROGERphi-7B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3861332375873793 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5195583428468424 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07326283987915408 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46853125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3052692819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/benhaotang/phi4-qwq-sky-t1/08f1ef63-efc7-449c-92cf-6f180b9d2712.json b/data/hfopenllm_v2/microsoft/benhaotang/phi4-qwq-sky-t1/08f1ef63-efc7-449c-92cf-6f180b9d2712.json new file mode 100644 index 000000000..3635b0251 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/benhaotang/phi4-qwq-sky-t1/08f1ef63-efc7-449c-92cf-6f180b9d2712.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/benhaotang_phi4-qwq-sky-t1/1762652580.030136", + "retrieved_timestamp": "1762652580.030137", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "benhaotang/phi4-qwq-sky-t1", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "benhaotang/phi4-qwq-sky-t1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04596249063595704 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6710520703782934 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41012084592145015 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3951342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48995833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5244348404255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-3.5-mini-TitanFusion-0.1/60823e05-59e3-4c4c-a23e-8ef495aa39be.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-3.5-mini-TitanFusion-0.1/60823e05-59e3-4c4c-a23e-8ef495aa39be.json new file mode 100644 index 000000000..97826a2de --- /dev/null +++ b/data/hfopenllm_v2/microsoft/bunnycore/Phi-3.5-mini-TitanFusion-0.1/60823e05-59e3-4c4c-a23e-8ef495aa39be.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-3.5-mini-TitanFusion-0.1/1762652580.04916", + "retrieved_timestamp": "1762652580.049161", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Phi-3.5-mini-TitanFusion-0.1", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "bunnycore/Phi-3.5-mini-TitanFusion-0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5227950726295119 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5373733988565133 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11858006042296072 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4453125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3806515957446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v2/5bc6e404-5798-4d19-88d1-5a8153947227.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v2/5bc6e404-5798-4d19-88d1-5a8153947227.json new file mode 100644 index 000000000..2f9d6926b --- /dev/null +++ b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v2/5bc6e404-5798-4d19-88d1-5a8153947227.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Model-Stock-v2/1762652580.050115", + "retrieved_timestamp": "1762652580.050116", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Phi-4-Model-Stock-v2", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "bunnycore/Phi-4-Model-Stock-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.63752510006782 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6824667320746144 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37537764350453173 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.348993288590604 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46617708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5330784574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v3/5832ef9b-bd14-46ba-b04d-049280bc5267.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v3/5832ef9b-bd14-46ba-b04d-049280bc5267.json new file mode 100644 index 000000000..54d5a53e0 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v3/5832ef9b-bd14-46ba-b04d-049280bc5267.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Model-Stock-v3/1762652580.050334", + "retrieved_timestamp": "1762652580.050335", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Phi-4-Model-Stock-v3", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "bunnycore/Phi-4-Model-Stock-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5911636679565775 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6726298549419627 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4901812688821752 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41663541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5381482712765957 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v4/92363115-37f2-4d2f-8178-61fc98c8f337.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v4/92363115-37f2-4d2f-8178-61fc98c8f337.json new file mode 100644 index 000000000..2939f7d43 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock-v4/92363115-37f2-4d2f-8178-61fc98c8f337.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Model-Stock-v4/1762652580.0505521", + "retrieved_timestamp": "1762652580.050553", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Phi-4-Model-Stock-v4", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "bunnycore/Phi-4-Model-Stock-v4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7110145524984818 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6924302574038697 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38293051359516617 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4610625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5393949468085106 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock/cee9b876-96b3-4429-af70-6a5b45747a3b.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock/cee9b876-96b3-4429-af70-6a5b45747a3b.json new file mode 100644 index 000000000..49f032f10 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Model-Stock/cee9b876-96b3-4429-af70-6a5b45747a3b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Model-Stock/1762652580.0497222", + "retrieved_timestamp": "1762652580.049727", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Phi-4-Model-Stock", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "bunnycore/Phi-4-Model-Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6878837041272712 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6889699980822082 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4297583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3548657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44413541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5368184840425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RP-v0/29135c1b-e6a0-428a-ba4f-459e9b652d25.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RP-v0/29135c1b-e6a0-428a-ba4f-459e9b652d25.json new file mode 100644 index 000000000..9baaa598c --- /dev/null +++ b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RP-v0/29135c1b-e6a0-428a-ba4f-459e9b652d25.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-RP-v0/1762652580.050766", + "retrieved_timestamp": "1762652580.0507672", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Phi-4-RP-v0", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "bunnycore/Phi-4-RP-v0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6827129793392643 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.685633603278299 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33157099697885195 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3523489932885906 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41409375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5364029255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RR-Shoup/377bc688-a18e-4abb-91f7-d78a934e1649.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RR-Shoup/377bc688-a18e-4abb-91f7-d78a934e1649.json new file mode 100644 index 000000000..0960d2f1b --- /dev/null +++ b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RR-Shoup/377bc688-a18e-4abb-91f7-d78a934e1649.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-RR-Shoup/1762652580.050983", + "retrieved_timestamp": "1762652580.050983", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Phi-4-RR-Shoup", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "bunnycore/Phi-4-RR-Shoup" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6586579165503088 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6947025970028124 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49924471299093653 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.337248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44404166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5428856382978723 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RStock-v0.1/cf300641-1ec3-4ee7-b38d-b274ebc23ff2.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RStock-v0.1/cf300641-1ec3-4ee7-b38d-b274ebc23ff2.json new file mode 100644 index 000000000..86bbe27d3 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-RStock-v0.1/cf300641-1ec3-4ee7-b38d-b274ebc23ff2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-RStock-v0.1/1762652580.051188", + "retrieved_timestamp": "1762652580.051189", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Phi-4-RStock-v0.1", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "bunnycore/Phi-4-RStock-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7018721436898541 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6928310064675399 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3950151057401813 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3649328859060403 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45836458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5400598404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-ReasoningRP/5db77608-f892-4ac4-93c4-03f177696484.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-ReasoningRP/5db77608-f892-4ac4-93c4-03f177696484.json new file mode 100644 index 000000000..886724d71 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-ReasoningRP/5db77608-f892-4ac4-93c4-03f177696484.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-ReasoningRP/1762652580.05142", + "retrieved_timestamp": "1762652580.051421", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Phi-4-ReasoningRP", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "bunnycore/Phi-4-ReasoningRP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6736204382150472 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6922187070022994 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4569486404833837 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34395973154362414 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44909375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5420545212765957 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Sce-exp-v0.1/c8de0acd-7cce-45c0-9032-2b717f3917b8.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Sce-exp-v0.1/c8de0acd-7cce-45c0-9032-2b717f3917b8.json new file mode 100644 index 000000000..1edca2f10 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Sce-exp-v0.1/c8de0acd-7cce-45c0-9032-2b717f3917b8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Sce-exp-v0.1/1762652580.0516632", + "retrieved_timestamp": "1762652580.0516639", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Phi-4-Sce-exp-v0.1", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "bunnycore/Phi-4-Sce-exp-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6595322632836429 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.694317957938629 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5030211480362538 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33557046979865773 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44407291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5423038563829787 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Stock-Ex/bc007572-56ff-449a-9e3d-5ab770c3ae44.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Stock-Ex/bc007572-56ff-449a-9e3d-5ab770c3ae44.json new file mode 100644 index 000000000..25367b08a --- /dev/null +++ b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Stock-Ex/bc007572-56ff-449a-9e3d-5ab770c3ae44.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Stock-Ex/1762652580.051897", + "retrieved_timestamp": "1762652580.051897", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Phi-4-Stock-Ex", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "bunnycore/Phi-4-Stock-Ex" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6574588757829227 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6864461628663387 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4086102719033233 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35067114093959734 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46236458333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5374833776595744 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Stock-RP/69724e46-4038-4d3a-a8ff-e84a56bba9e8.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Stock-RP/69724e46-4038-4d3a-a8ff-e84a56bba9e8.json new file mode 100644 index 000000000..669047541 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Stock-RP/69724e46-4038-4d3a-a8ff-e84a56bba9e8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Stock-RP/1762652580.0521228", + "retrieved_timestamp": "1762652580.0521228", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Phi-4-Stock-RP", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "bunnycore/Phi-4-Stock-RP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6399231816025922 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6859633715492438 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3413897280966767 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35822147651006714 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47147916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5316655585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Trim-Exp1/c13c2fd7-e271-4935-a3a6-4161cb8e4ea2.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Trim-Exp1/c13c2fd7-e271-4935-a3a6-4161cb8e4ea2.json new file mode 100644 index 000000000..cf28bcae3 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/bunnycore/Phi-4-Trim-Exp1/c13c2fd7-e271-4935-a3a6-4161cb8e4ea2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Trim-Exp1/1762652580.052348", + "retrieved_timestamp": "1762652580.052348", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Phi-4-Trim-Exp1", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "bunnycore/Phi-4-Trim-Exp1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12192538021338936 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28516626650940224 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.005287009063444109 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2550335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4176875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1146941489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.503 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/bunnycore/Phi-Seek-4-Sce-V1/75810fb9-99b5-4707-80a8-8974bbb0844d.json b/data/hfopenllm_v2/microsoft/bunnycore/Phi-Seek-4-Sce-V1/75810fb9-99b5-4707-80a8-8974bbb0844d.json new file mode 100644 index 000000000..167e41c3e --- /dev/null +++ b/data/hfopenllm_v2/microsoft/bunnycore/Phi-Seek-4-Sce-V1/75810fb9-99b5-4707-80a8-8974bbb0844d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bunnycore_Phi-Seek-4-Sce-V1/1762652580.052572", + "retrieved_timestamp": "1762652580.052573", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bunnycore/Phi-Seek-4-Sce-V1", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "bunnycore/Phi-Seek-4-Sce-V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29348462080612775 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6459114889718743 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21450151057401812 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39815625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5123005319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/carsenk/phi3.5_mini_exp_825_uncensored/68315e0a-603c-4784-a567-e342a6185c07.json b/data/hfopenllm_v2/microsoft/carsenk/phi3.5_mini_exp_825_uncensored/68315e0a-603c-4784-a567-e342a6185c07.json new file mode 100644 index 000000000..57636370f --- /dev/null +++ b/data/hfopenllm_v2/microsoft/carsenk/phi3.5_mini_exp_825_uncensored/68315e0a-603c-4784-a567-e342a6185c07.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/carsenk_phi3.5_mini_exp_825_uncensored/1762652580.083884", + "retrieved_timestamp": "1762652580.083887", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "carsenk/phi3.5_mini_exp_825_uncensored", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "carsenk/phi3.5_mini_exp_825_uncensored" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13641360479084386 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29647345147918264 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24916107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36441666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11751994680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/cognitivecomputations/Dolphin3.0-R1-Mistral-24B/8a641aee-1604-4910-8164-9e6d5c0652b1.json b/data/hfopenllm_v2/microsoft/cognitivecomputations/Dolphin3.0-R1-Mistral-24B/8a641aee-1604-4910-8164-9e6d5c0652b1.json new file mode 100644 index 000000000..a4bc02bcd --- /dev/null +++ b/data/hfopenllm_v2/microsoft/cognitivecomputations/Dolphin3.0-R1-Mistral-24B/8a641aee-1604-4910-8164-9e6d5c0652b1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_Dolphin3.0-R1-Mistral-24B/1762652580.112771", + "retrieved_timestamp": "1762652580.112771", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cognitivecomputations/Dolphin3.0-R1-Mistral-24B", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "cognitivecomputations/Dolphin3.0-R1-Mistral-24B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.406816136739407 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5359697041031141 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3119335347432024 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3951770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.300531914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.1-yi-1.5-34b/4e6cb7a6-f01d-4e25-be2f-bda77af2eaf6.json b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.1-yi-1.5-34b/4e6cb7a6-f01d-4e25-be2f-bda77af2eaf6.json new file mode 100644 index 000000000..b210c7df5 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.1-yi-1.5-34b/4e6cb7a6-f01d-4e25-be2f-bda77af2eaf6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.1-yi-1.5-34b/1762652580.113518", + "retrieved_timestamp": "1762652580.1135192", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cognitivecomputations/dolphin-2.9.1-yi-1.5-34b", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "cognitivecomputations/dolphin-2.9.1-yi-1.5-34b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3852588908540451 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6076225600626862 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1865558912386707 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34312080536912754 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45979166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4518783244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.389 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.1-yi-1.5-9b/e1003371-d503-469d-ae41-e813d097ea43.json b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.1-yi-1.5-9b/e1003371-d503-469d-ae41-e813d097ea43.json new file mode 100644 index 000000000..47b19f228 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.1-yi-1.5-9b/e1003371-d503-469d-ae41-e813d097ea43.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.1-yi-1.5-9b/1762652580.113816", + "retrieved_timestamp": "1762652580.113816", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cognitivecomputations/dolphin-2.9.1-yi-1.5-9b", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "cognitivecomputations/dolphin-2.9.1-yi-1.5-9b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44653297694561545 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5484314644603556 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15181268882175228 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4348020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3966921542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/6f89f55f-a259-419a-b6ad-9b01b2dae9d8.json b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/6f89f55f-a259-419a-b6ad-9b01b2dae9d8.json new file mode 100644 index 000000000..8ea810245 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/6f89f55f-a259-419a-b6ad-9b01b2dae9d8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium-abliterated/1762652580.1142762", + "retrieved_timestamp": "1762652580.1142762", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36125369574950017 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.612322545411745 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12386706948640483 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4111770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4493849734042553 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 13.96 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/958ad3b8-9b65-4165-9d3c-a49e25802fd3.json b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/958ad3b8-9b65-4165-9d3c-a49e25802fd3.json new file mode 100644 index 000000000..08339c829 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated/958ad3b8-9b65-4165-9d3c-a49e25802fd3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium-abliterated/1762652580.114508", + "retrieved_timestamp": "1762652580.114509", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4123614232458765 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.638289226729353 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18202416918429004 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288590604026846 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43492708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45246010638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 13.96 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium/36476eb7-a89a-45e1-b423-7755edfd5be1.json b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium/36476eb7-a89a-45e1-b423-7755edfd5be1.json new file mode 100644 index 000000000..08d71b3ac --- /dev/null +++ b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.2-Phi-3-Medium/36476eb7-a89a-45e1-b423-7755edfd5be1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium/1762652580.114048", + "retrieved_timestamp": "1762652580.114049", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4247762603226107 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6456739302686527 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18277945619335348 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271812080536913 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4190520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45553523936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": -1.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k/0e625490-b7b1-4b64-aa1e-222c4e21d7a5.json b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k/0e625490-b7b1-4b64-aa1e-222c4e21d7a5.json new file mode 100644 index 000000000..6dd0aa869 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k/0e625490-b7b1-4b64-aa1e-222c4e21d7a5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.3-Yi-1.5-34B-32k/1762652580.115152", + "retrieved_timestamp": "1762652580.115152", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3639266036339136 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6046995537773227 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16691842900302115 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34312080536912754 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43105208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4630152925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 34.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-mistral-7B-32k/4a0bc836-88b7-4d6e-9f0d-321ff75b1733.json b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-mistral-7B-32k/4a0bc836-88b7-4d6e-9f0d-321ff75b1733.json new file mode 100644 index 000000000..fa4e293d0 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-mistral-7B-32k/4a0bc836-88b7-4d6e-9f0d-321ff75b1733.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.3-mistral-7B-32k/1762652580.1153762", + "retrieved_timestamp": "1762652580.115377", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cognitivecomputations/dolphin-2.9.3-mistral-7B-32k", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "cognitivecomputations/dolphin-2.9.3-mistral-7B-32k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4126362495955177 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48125401481062013 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4642604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2820811170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b/05488c6f-dfd4-4481-a3d4-15a918b115d3.json b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b/05488c6f-dfd4-4481-a3d4-15a918b115d3.json new file mode 100644 index 000000000..9e92badaa --- /dev/null +++ b/data/hfopenllm_v2/microsoft/cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b/05488c6f-dfd4-4481-a3d4-15a918b115d3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.3-mistral-nemo-12b/1762652580.115594", + "retrieved_timestamp": "1762652580.115595", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5600894515441251 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5480369183144175 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07401812688821752 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4429895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3376828457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/ehristoforu/phi-4-25b/d11d7e47-f9e0-4502-9e71-0654819c3cd4.json b/data/hfopenllm_v2/microsoft/ehristoforu/phi-4-25b/d11d7e47-f9e0-4502-9e71-0654819c3cd4.json new file mode 100644 index 000000000..f7766aef3 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/ehristoforu/phi-4-25b/d11d7e47-f9e0-4502-9e71-0654819c3cd4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_phi-4-25b/1762652580.144644", + "retrieved_timestamp": "1762652580.1446452", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/phi-4-25b", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "ehristoforu/phi-4-25b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6483663346587056 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6907778236877188 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.452416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4207916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5350731382978723 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 24.883 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/ehristoforu/ruphi-4b/70337ca5-7810-4e52-8382-0c2568a6ab70.json b/data/hfopenllm_v2/microsoft/ehristoforu/ruphi-4b/70337ca5-7810-4e52-8382-0c2568a6ab70.json new file mode 100644 index 000000000..b9bfd1744 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/ehristoforu/ruphi-4b/70337ca5-7810-4e52-8382-0c2568a6ab70.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ehristoforu_ruphi-4b/1762652580.1457548", + "retrieved_timestamp": "1762652580.145756", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ehristoforu/ruphi-4b", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "ehristoforu/ruphi-4b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17518185082248433 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29060336568338 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23993288590604026 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35117708333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11261635638297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/fhai50032/Unaligned-Thinker-PHI-4/bda90ce2-cb80-4942-8492-28329d7f5aeb.json b/data/hfopenllm_v2/microsoft/fhai50032/Unaligned-Thinker-PHI-4/bda90ce2-cb80-4942-8492-28329d7f5aeb.json new file mode 100644 index 000000000..057694ee4 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/fhai50032/Unaligned-Thinker-PHI-4/bda90ce2-cb80-4942-8492-28329d7f5aeb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/fhai50032_Unaligned-Thinker-PHI-4/1762652580.154337", + "retrieved_timestamp": "1762652580.1543381", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "fhai50032/Unaligned-Thinker-PHI-4", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "fhai50032/Unaligned-Thinker-PHI-4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.056254072527560206 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6642576780946753 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33534743202416917 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4678541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5147107712765957 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/hotmailuser/Phi4-Slerp4-14B/da866c81-296f-463c-962b-6b871d6fb633.json b/data/hfopenllm_v2/microsoft/hotmailuser/Phi4-Slerp4-14B/da866c81-296f-463c-962b-6b871d6fb633.json new file mode 100644 index 000000000..c034de706 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/hotmailuser/Phi4-Slerp4-14B/da866c81-296f-463c-962b-6b871d6fb633.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_Phi4-Slerp4-14B/1762652580.1958668", + "retrieved_timestamp": "1762652580.195868", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/Phi4-Slerp4-14B", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "hotmailuser/Phi4-Slerp4-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0629485321170051 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6731037909447855 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39681208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5097395833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5277593085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002/c78d1aaf-9975-45d6-9a8d-eed76f7e0a0f.json b/data/hfopenllm_v2/microsoft/magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002/c78d1aaf-9975-45d6-9a8d-eed76f7e0a0f.json new file mode 100644 index 000000000..357cf506b --- /dev/null +++ b/data/hfopenllm_v2/microsoft/magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002/c78d1aaf-9975-45d6-9a8d-eed76f7e0a0f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/magnifi_Phi3_intent_v56_3_w_unknown_5_lr_0.002/1762652580.32982", + "retrieved_timestamp": "1762652580.329825", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20181008612703183 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3281563256810973 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41229166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1471908244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/microsoft/phi-1/b88d579f-6bc7-4aee-a117-28786cba3300.json b/data/hfopenllm_v2/microsoft/microsoft/phi-1/b88d579f-6bc7-4aee-a117-28786cba3300.json new file mode 100644 index 000000000..a91093514 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/microsoft/phi-1/b88d579f-6bc7-4aee-a117-28786cba3300.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/microsoft_phi-1/1762652580.357049", + "retrieved_timestamp": "1762652580.3570502", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "microsoft/phi-1", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "microsoft/phi-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20680571993421898 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31394755895837845 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.009818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35251041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11619015957446809 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "PhiForCausalLM", + "params_billions": 1.418 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/microsoft/phi-1_5/0bc55439-f6a1-4588-858a-082907876d6e.json b/data/hfopenllm_v2/microsoft/microsoft/phi-1_5/0bc55439-f6a1-4588-858a-082907876d6e.json new file mode 100644 index 000000000..8e9ce12cc --- /dev/null +++ b/data/hfopenllm_v2/microsoft/microsoft/phi-1_5/0bc55439-f6a1-4588-858a-082907876d6e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/microsoft_phi-1_5/1762652580.357298", + "retrieved_timestamp": "1762652580.357298", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "microsoft/phi-1_5", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "microsoft/phi-1_5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2032839532440591 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33597583211996657 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01812688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34041666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16913231382978725 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 1.418 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/microsoft/phi-2/e38ef3e4-585f-46de-beb4-c794d767b579.json b/data/hfopenllm_v2/microsoft/microsoft/phi-2/e38ef3e4-585f-46de-beb4-c794d767b579.json new file mode 100644 index 000000000..ba898b966 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/microsoft/phi-2/e38ef3e4-585f-46de-beb4-c794d767b579.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/microsoft_phi-2/1762652580.357496", + "retrieved_timestamp": "1762652580.357497", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "microsoft/phi-2", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "microsoft/phi-2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.273875539125077 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4881208771249696 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4098958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26279920212765956 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/microsoft/phi-4/5481936f-d52a-486b-871e-d2e48c1b0278.json b/data/hfopenllm_v2/microsoft/microsoft/phi-4/5481936f-d52a-486b-871e-d2e48c1b0278.json new file mode 100644 index 000000000..774da0dc6 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/microsoft/phi-4/5481936f-d52a-486b-871e-d2e48c1b0278.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/microsoft_phi-4/1762652580.357901", + "retrieved_timestamp": "1762652580.357902", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "microsoft/phi-4", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "microsoft/phi-4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0585269307659233 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6690562305322874 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3164652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40604026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5033541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5286735372340425 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/microsoft/phi-4/f3ee4f04-22f1-4ddb-afb2-27b8f641042b.json b/data/hfopenllm_v2/microsoft/microsoft/phi-4/f3ee4f04-22f1-4ddb-afb2-27b8f641042b.json new file mode 100644 index 000000000..c516533cb --- /dev/null +++ b/data/hfopenllm_v2/microsoft/microsoft/phi-4/f3ee4f04-22f1-4ddb-afb2-27b8f641042b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/microsoft_phi-4/1762652580.3577", + "retrieved_timestamp": "1762652580.357701", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "microsoft/phi-4", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "microsoft/phi-4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.048785001573602486 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6703464626619114 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27870090634441086 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.401006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5033541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5295046542553191 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Phi3ForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/migtissera/Tess-v2.5-Phi-3-medium-128k-14B/260f2500-c920-4e3f-901b-10efc03f0390.json b/data/hfopenllm_v2/microsoft/migtissera/Tess-v2.5-Phi-3-medium-128k-14B/260f2500-c920-4e3f-901b-10efc03f0390.json new file mode 100644 index 000000000..9ca82ee9c --- /dev/null +++ b/data/hfopenllm_v2/microsoft/migtissera/Tess-v2.5-Phi-3-medium-128k-14B/260f2500-c920-4e3f-901b-10efc03f0390.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/migtissera_Tess-v2.5-Phi-3-medium-128k-14B/1762652580.35902", + "retrieved_timestamp": "1762652580.359021", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "migtissera/Tess-v2.5-Phi-3-medium-128k-14B", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "migtissera/Tess-v2.5-Phi-3-medium-128k-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45387682460316403 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6206613823135703 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41130208333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3731715425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 13.96 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/mkurman/phi-4-MedIT-11B-exp-1/d64a8825-610a-4128-8c68-55150a76ed88.json b/data/hfopenllm_v2/microsoft/mkurman/phi-4-MedIT-11B-exp-1/d64a8825-610a-4128-8c68-55150a76ed88.json new file mode 100644 index 000000000..df7850dc2 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/mkurman/phi-4-MedIT-11B-exp-1/d64a8825-610a-4128-8c68-55150a76ed88.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mkurman_phi-4-MedIT-11B-exp-1/1762652580.3661451", + "retrieved_timestamp": "1762652580.366146", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mkurman/phi-4-MedIT-11B-exp-1", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "mkurman/phi-4-MedIT-11B-exp-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5947607902587357 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5413943771388249 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08987915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38479166666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38248005319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Phi3ForCausalLM", + "params_billions": 11.514 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/mkurman/phi4-MedIT-10B-o1/c5a2a30d-99b0-4658-97f5-4c9be5576073.json b/data/hfopenllm_v2/microsoft/mkurman/phi4-MedIT-10B-o1/c5a2a30d-99b0-4658-97f5-4c9be5576073.json new file mode 100644 index 000000000..b438137ac --- /dev/null +++ b/data/hfopenllm_v2/microsoft/mkurman/phi4-MedIT-10B-o1/c5a2a30d-99b0-4658-97f5-4c9be5576073.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mkurman_phi4-MedIT-10B-o1/1762652580.366463", + "retrieved_timestamp": "1762652580.366464", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mkurman/phi4-MedIT-10B-o1", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "mkurman/phi4-MedIT-10B-o1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34629117408476173 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.519820312240642 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1148036253776435 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24580536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39679166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3507313829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaMedITForCausalLM", + "params_billions": 10.255 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/mlabonne/phixtral-2x2_8/ec051c9b-9399-4c8d-8710-6a182a234890.json b/data/hfopenllm_v2/microsoft/mlabonne/phixtral-2x2_8/ec051c9b-9399-4c8d-8710-6a182a234890.json new file mode 100644 index 000000000..589ea2460 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/mlabonne/phixtral-2x2_8/ec051c9b-9399-4c8d-8710-6a182a234890.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mlabonne_phixtral-2x2_8/1762652580.370162", + "retrieved_timestamp": "1762652580.370163", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mlabonne/phixtral-2x2_8", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "mlabonne/phixtral-2x2_8" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3431184811854767 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48885941873076205 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.035498489425981876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3643541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2550698138297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 4.458 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/mrm8488/phi-4-14B-grpo-gsm8k-3e/1bd4d2fe-cd83-4a79-b102-40be8ebb6245.json b/data/hfopenllm_v2/microsoft/mrm8488/phi-4-14B-grpo-gsm8k-3e/1bd4d2fe-cd83-4a79-b102-40be8ebb6245.json new file mode 100644 index 000000000..99101a7f9 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/mrm8488/phi-4-14B-grpo-gsm8k-3e/1bd4d2fe-cd83-4a79-b102-40be8ebb6245.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mrm8488_phi-4-14B-grpo-gsm8k-3e/1762652580.374398", + "retrieved_timestamp": "1762652580.374399", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mrm8488/phi-4-14B-grpo-gsm8k-3e", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "mrm8488/phi-4-14B-grpo-gsm8k-3e" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.688533092195375 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6805415739665394 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.452416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33557046979865773 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39939583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.526845079787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/mrm8488/phi-4-14B-grpo-limo/e671d26c-1d8a-4d22-b360-dc3e449886b8.json b/data/hfopenllm_v2/microsoft/mrm8488/phi-4-14B-grpo-limo/e671d26c-1d8a-4d22-b360-dc3e449886b8.json new file mode 100644 index 000000000..33afc68ba --- /dev/null +++ b/data/hfopenllm_v2/microsoft/mrm8488/phi-4-14B-grpo-limo/e671d26c-1d8a-4d22-b360-dc3e449886b8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mrm8488_phi-4-14B-grpo-limo/1762652580.374649", + "retrieved_timestamp": "1762652580.37465", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mrm8488/phi-4-14B-grpo-limo", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "mrm8488/phi-4-14B-grpo-limo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.681239112222237 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.678485424233919 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4569486404833837 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33640939597315433 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3980625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5260970744680851 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/netcat420/MFANN-abliterated-phi2-merge-unretrained/a3c07d22-20d1-4878-80d5-04b949580829.json b/data/hfopenllm_v2/microsoft/netcat420/MFANN-abliterated-phi2-merge-unretrained/a3c07d22-20d1-4878-80d5-04b949580829.json new file mode 100644 index 000000000..52df068ee --- /dev/null +++ b/data/hfopenllm_v2/microsoft/netcat420/MFANN-abliterated-phi2-merge-unretrained/a3c07d22-20d1-4878-80d5-04b949580829.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-abliterated-phi2-merge-unretrained/1762652580.3939252", + "retrieved_timestamp": "1762652580.393926", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN-abliterated-phi2-merge-unretrained", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "netcat420/MFANN-abliterated-phi2-merge-unretrained" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3005037744296245 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4104131503721586 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028700906344410877 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31834375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14777260638297873 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.775 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V2/8b4f2ab4-dcd7-4c5d-9bd0-6d7e1580c123.json b/data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V2/8b4f2ab4-dcd7-4c5d-9bd0-6d7e1580c123.json new file mode 100644 index 000000000..2d7be6f17 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V2/8b4f2ab4-dcd7-4c5d-9bd0-6d7e1580c123.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-phigments-slerp-V2/1762652580.3950222", + "retrieved_timestamp": "1762652580.395023", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN-phigments-slerp-V2", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "netcat420/MFANN-phigments-slerp-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32316032571355113 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48272762171598743 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03172205438066465 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40372916666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2716921542553192 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V3.2/8c4e85ce-7b8f-479c-a1dc-114c7e5ba4f1.json b/data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V3.2/8c4e85ce-7b8f-479c-a1dc-114c7e5ba4f1.json new file mode 100644 index 000000000..c312d9693 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V3.2/8c4e85ce-7b8f-479c-a1dc-114c7e5ba4f1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-phigments-slerp-V3.2/1762652580.395236", + "retrieved_timestamp": "1762652580.395236", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN-phigments-slerp-V3.2", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "netcat420/MFANN-phigments-slerp-V3.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35243598097492435 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4808549324972969 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03323262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3707708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2705285904255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V3.3/b3466ac6-df1f-4440-9d7b-7991cac7d733.json b/data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V3.3/b3466ac6-df1f-4440-9d7b-7991cac7d733.json new file mode 100644 index 000000000..66cdcbe30 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/netcat420/MFANN-phigments-slerp-V3.3/b3466ac6-df1f-4440-9d7b-7991cac7d733.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-phigments-slerp-V3.3/1762652580.395446", + "retrieved_timestamp": "1762652580.395447", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN-phigments-slerp-V3.3", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "netcat420/MFANN-phigments-slerp-V3.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36909732842192056 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48952950463630956 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03323262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38921874999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802526595744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/pankajmathur/orca_mini_phi-4/f5971ede-de93-4729-8a03-b9ec3abea21e.json b/data/hfopenllm_v2/microsoft/pankajmathur/orca_mini_phi-4/f5971ede-de93-4729-8a03-b9ec3abea21e.json new file mode 100644 index 000000000..5c89476bc --- /dev/null +++ b/data/hfopenllm_v2/microsoft/pankajmathur/orca_mini_phi-4/f5971ede-de93-4729-8a03-b9ec3abea21e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_phi-4/1762652580.435327", + "retrieved_timestamp": "1762652580.435328", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_phi-4", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_phi-4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7780588837617521 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6856329737542378 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29531722054380666 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37416107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47030208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5255152925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Empathetic/a7a2af83-7047-4601-bfdd-ac25abf3890d.json b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Empathetic/a7a2af83-7047-4601-bfdd-ac25abf3890d.json new file mode 100644 index 000000000..70787926a --- /dev/null +++ b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Empathetic/a7a2af83-7047-4601-bfdd-ac25abf3890d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-Empathetic/1762652580.469516", + "retrieved_timestamp": "1762652580.469517", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Phi-4-Empathetic", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "prithivMLmods/Phi-4-Empathetic" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.049659348306936704 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6726820578371974 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2620845921450151 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3800335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49913541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5065658244680851 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Math-IO/88c03059-5add-46ea-b423-4cf8496c5763.json b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Math-IO/88c03059-5add-46ea-b423-4cf8496c5763.json new file mode 100644 index 000000000..9fa80a65d --- /dev/null +++ b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Math-IO/88c03059-5add-46ea-b423-4cf8496c5763.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-Math-IO/1762652580.469801", + "retrieved_timestamp": "1762652580.469801", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Phi-4-Math-IO", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "prithivMLmods/Phi-4-Math-IO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05897684809638426 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6668255086606543 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45770392749244715 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39848993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4872916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5205285904255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-QwQ/8e84f2de-117a-4526-9d58-86a63011a07f.json b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-QwQ/8e84f2de-117a-4526-9d58-86a63011a07f.json new file mode 100644 index 000000000..24a4000dc --- /dev/null +++ b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-QwQ/8e84f2de-117a-4526-9d58-86a63011a07f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-QwQ/1762652580.470021", + "retrieved_timestamp": "1762652580.470022", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Phi-4-QwQ", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "prithivMLmods/Phi-4-QwQ" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05592937849350833 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6695574237334824 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45770392749244715 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39093959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4650625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5275099734042553 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super-1/91c5f088-38fd-4ea7-bf95-3d6a69653cca.json b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super-1/91c5f088-38fd-4ea7-bf95-3d6a69653cca.json new file mode 100644 index 000000000..3403bd59e --- /dev/null +++ b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super-1/91c5f088-38fd-4ea7-bf95-3d6a69653cca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-Super-1/1762652580.470496", + "retrieved_timestamp": "1762652580.470498", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Phi-4-Super-1", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "prithivMLmods/Phi-4-Super-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04176584795010572 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.672933647971901 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35196374622356497 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3934563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5017395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5235206117021277 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super-o1/b90749f4-0542-42b6-a708-4e14bc586ad1.json b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super-o1/b90749f4-0542-42b6-a708-4e14bc586ad1.json new file mode 100644 index 000000000..e1c1eebda --- /dev/null +++ b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super-o1/b90749f4-0542-42b6-a708-4e14bc586ad1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-Super-o1/1762652580.470741", + "retrieved_timestamp": "1762652580.470741", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Phi-4-Super-o1", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "prithivMLmods/Phi-4-Super-o1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04176584795010572 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.672933647971901 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35196374622356497 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3934563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5017395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5235206117021277 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super/ec19309c-9bbe-4d42-894d-3638dbe5dfac.json b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super/ec19309c-9bbe-4d42-894d-3638dbe5dfac.json new file mode 100644 index 000000000..a8bce0272 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-Super/ec19309c-9bbe-4d42-894d-3638dbe5dfac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-Super/1762652580.470242", + "retrieved_timestamp": "1762652580.470242", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Phi-4-Super", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "prithivMLmods/Phi-4-Super" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04813561350549875 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6720116458521787 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34894259818731116 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39429530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.504375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.526595744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-o1/d58bf1bb-e269-4741-a9f1-be242443ad4a.json b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-o1/d58bf1bb-e269-4741-a9f1-be242443ad4a.json new file mode 100644 index 000000000..dfba4211b --- /dev/null +++ b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi-4-o1/d58bf1bb-e269-4741-a9f1-be242443ad4a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-o1/1762652580.470958", + "retrieved_timestamp": "1762652580.4709592", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Phi-4-o1", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "prithivMLmods/Phi-4-o1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028976449154908976 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6688727399756971 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3995468277945619 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3825503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49777083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5173703457446809 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/prithivMLmods/Phi4-Super/07ee76dd-a928-469b-912e-cfd2e0a26ef9.json b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi4-Super/07ee76dd-a928-469b-912e-cfd2e0a26ef9.json new file mode 100644 index 000000000..77b43b1e4 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/prithivMLmods/Phi4-Super/07ee76dd-a928-469b-912e-cfd2e0a26ef9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi4-Super/1762652580.471183", + "retrieved_timestamp": "1762652580.4711838", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Phi4-Super", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "prithivMLmods/Phi4-Super" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04813561350549875 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6720116458521787 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34894259818731116 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39429530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.504375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.526595744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/rhysjones/phi-2-orange-v2/bf679659-f55f-43c8-86b5-ed7805e8c3ee.json b/data/hfopenllm_v2/microsoft/rhysjones/phi-2-orange-v2/bf679659-f55f-43c8-86b5-ed7805e8c3ee.json new file mode 100644 index 000000000..50a25654e --- /dev/null +++ b/data/hfopenllm_v2/microsoft/rhysjones/phi-2-orange-v2/bf679659-f55f-43c8-86b5-ed7805e8c3ee.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rhysjones_phi-2-orange-v2/1762652580.495306", + "retrieved_timestamp": "1762652580.495307", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rhysjones/phi-2-orange-v2", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "rhysjones/phi-2-orange-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3669740732367895 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4770220109816213 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3629583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25324135638297873 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/suayptalha/Luminis-phi-4/ace18207-a255-447d-9aba-8afdee092164.json b/data/hfopenllm_v2/microsoft/suayptalha/Luminis-phi-4/ace18207-a255-447d-9aba-8afdee092164.json new file mode 100644 index 000000000..1e96340db --- /dev/null +++ b/data/hfopenllm_v2/microsoft/suayptalha/Luminis-phi-4/ace18207-a255-447d-9aba-8afdee092164.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/suayptalha_Luminis-phi-4/1762652580.544511", + "retrieved_timestamp": "1762652580.544511", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "suayptalha/Luminis-phi-4", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "suayptalha/Luminis-phi-4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6900069593124022 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6920213038130584 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4637462235649547 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35151006711409394 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45715625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5423869680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/tensopolis/phi-4-tensopolis-v1/bcbdde44-0736-4162-9faf-cd9d8e89d360.json b/data/hfopenllm_v2/microsoft/tensopolis/phi-4-tensopolis-v1/bcbdde44-0736-4162-9faf-cd9d8e89d360.json new file mode 100644 index 000000000..01c0a0d66 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/tensopolis/phi-4-tensopolis-v1/bcbdde44-0736-4162-9faf-cd9d8e89d360.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tensopolis_phi-4-tensopolis-v1/1762652580.5562031", + "retrieved_timestamp": "1762652580.5562031", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tensopolis/phi-4-tensopolis-v1", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "tensopolis/phi-4-tensopolis-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6766679078179231 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6871833310149728 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49395770392749244 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3347315436241611 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4140625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5383976063829787 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/theprint/phi-3-mini-4k-python/f017d759-59fe-42a3-947d-a4b787f084d7.json b/data/hfopenllm_v2/microsoft/theprint/phi-3-mini-4k-python/f017d759-59fe-42a3-947d-a4b787f084d7.json new file mode 100644 index 000000000..604e87b1b --- /dev/null +++ b/data/hfopenllm_v2/microsoft/theprint/phi-3-mini-4k-python/f017d759-59fe-42a3-947d-a4b787f084d7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/theprint_phi-3-mini-4k-python/1762652580.5645702", + "retrieved_timestamp": "1762652580.564571", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "theprint/phi-3-mini-4k-python", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "theprint/phi-3-mini-4k-python" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24087753826513653 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.493759004635898 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10498489425981873 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3921666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35771276595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 4.132 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/unsloth/phi-4-bnb-4bit/c8cfc527-9a58-45e7-a8e0-39caacd8bd58.json b/data/hfopenllm_v2/microsoft/unsloth/phi-4-bnb-4bit/c8cfc527-9a58-45e7-a8e0-39caacd8bd58.json new file mode 100644 index 000000000..3a98ddf47 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/unsloth/phi-4-bnb-4bit/c8cfc527-9a58-45e7-a8e0-39caacd8bd58.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/unsloth_phi-4-bnb-4bit/1762652580.579705", + "retrieved_timestamp": "1762652580.579705", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "unsloth/phi-4-bnb-4bit", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "unsloth/phi-4-bnb-4bit" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6729710501469435 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6769854242339189 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4607250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40072916666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5255984042553191 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.058 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/unsloth/phi-4-unsloth-bnb-4bit/3bdd8e19-fd61-4d1e-96b1-cdadd4c2d67f.json b/data/hfopenllm_v2/microsoft/unsloth/phi-4-unsloth-bnb-4bit/3bdd8e19-fd61-4d1e-96b1-cdadd4c2d67f.json new file mode 100644 index 000000000..5a6adb3fa --- /dev/null +++ b/data/hfopenllm_v2/microsoft/unsloth/phi-4-unsloth-bnb-4bit/3bdd8e19-fd61-4d1e-96b1-cdadd4c2d67f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/unsloth_phi-4-unsloth-bnb-4bit/1762652580.579966", + "retrieved_timestamp": "1762652580.579967", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "unsloth/phi-4-unsloth-bnb-4bit", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "unsloth/phi-4-unsloth-bnb-4bit" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6793906833867471 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6791089896968764 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4561933534743202 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33640939597315433 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40339583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5285904255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.483 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/unsloth/phi-4/c6080b92-d05a-4bda-ad07-e1b59a427844.json b/data/hfopenllm_v2/microsoft/unsloth/phi-4/c6080b92-d05a-4bda-ad07-e1b59a427844.json new file mode 100644 index 000000000..39ed36307 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/unsloth/phi-4/c6080b92-d05a-4bda-ad07-e1b59a427844.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/unsloth_phi-4/1762652580.579377", + "retrieved_timestamp": "1762652580.579378", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "unsloth/phi-4", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "unsloth/phi-4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6882083981613231 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6885874406040138 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33640939597315433 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41142708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5378158244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/microsoft/uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b/49cd8aff-0c7a-4245-831a-f4fc64383b48.json b/data/hfopenllm_v2/microsoft/uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b/49cd8aff-0c7a-4245-831a-f4fc64383b48.json new file mode 100644 index 000000000..a2ded30b3 --- /dev/null +++ b/data/hfopenllm_v2/microsoft/uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b/49cd8aff-0c7a-4245-831a-f4fc64383b48.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/uukuguy_speechless-mistral-dolphin-orca-platypus-samantha-7b/1762652580.583631", + "retrieved_timestamp": "1762652580.5836318", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b", + "developer": "microsoft", + "inference_platform": "unknown", + "id": "uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37002154283966543 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4982774952761688 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43613541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2990359042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/migtissera/Tess-3-7B-SFT/cc99f18f-e75c-4cd1-a466-ac8c54877bd2.json b/data/hfopenllm_v2/migtissera/Tess-3-7B-SFT/cc99f18f-e75c-4cd1-a466-ac8c54877bd2.json new file mode 100644 index 000000000..5821b41cd --- /dev/null +++ b/data/hfopenllm_v2/migtissera/Tess-3-7B-SFT/cc99f18f-e75c-4cd1-a466-ac8c54877bd2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/migtissera_Tess-3-7B-SFT/1762652580.358523", + "retrieved_timestamp": "1762652580.3585238", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "migtissera/Tess-3-7B-SFT", + "developer": "migtissera", + "inference_platform": "unknown", + "id": "migtissera/Tess-3-7B-SFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3946262583279033 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46073483895076217 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4112708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30335771276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/7320b12a-7511-441d-9d56-f7e713af4470.json b/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/7320b12a-7511-441d-9d56-f7e713af4470.json new file mode 100644 index 000000000..f1d780b6f --- /dev/null +++ b/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/7320b12a-7511-441d-9d56-f7e713af4470.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/migtissera_Trinity-2-Codestral-22B-v0.2/1762652580.3597598", + "retrieved_timestamp": "1762652580.359761", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "migtissera/Trinity-2-Codestral-22B-v0.2", + "developer": "migtissera", + "inference_platform": "unknown", + "id": "migtissera/Trinity-2-Codestral-22B-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43446832183052075 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5686364683055418 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08383685800604229 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40447916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33402593085106386 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/a18b3d46-7e65-4cb3-b7e5-12b86f34a572.json b/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/a18b3d46-7e65-4cb3-b7e5-12b86f34a572.json new file mode 100644 index 000000000..32c9e96bd --- /dev/null +++ b/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B-v0.2/a18b3d46-7e65-4cb3-b7e5-12b86f34a572.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/migtissera_Trinity-2-Codestral-22B-v0.2/1762652580.359978", + "retrieved_timestamp": "1762652580.359979", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "migtissera/Trinity-2-Codestral-22B-v0.2", + "developer": "migtissera", + "inference_platform": "unknown", + "id": "migtissera/Trinity-2-Codestral-22B-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44301121025545553 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5706466356198404 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08685800604229607 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4031458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3353557180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B/e075cb71-eaae-46e0-917b-bf84482f76c9.json b/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B/e075cb71-eaae-46e0-917b-bf84482f76c9.json new file mode 100644 index 000000000..c01e29920 --- /dev/null +++ b/data/hfopenllm_v2/migtissera/Trinity-2-Codestral-22B/e075cb71-eaae-46e0-917b-bf84482f76c9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/migtissera_Trinity-2-Codestral-22B/1762652580.35951", + "retrieved_timestamp": "1762652580.3595111", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "migtissera/Trinity-2-Codestral-22B", + "developer": "migtissera", + "inference_platform": "unknown", + "id": "migtissera/Trinity-2-Codestral-22B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4202050559182968 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5593244825460373 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09667673716012085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4110520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3307845744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ministral/Ministral-3b-instruct/83b6f014-f8a0-4e69-ae60-cc3a7aeaaf1c.json b/data/hfopenllm_v2/ministral/Ministral-3b-instruct/83b6f014-f8a0-4e69-ae60-cc3a7aeaaf1c.json new file mode 100644 index 000000000..0904ef1e9 --- /dev/null +++ b/data/hfopenllm_v2/ministral/Ministral-3b-instruct/83b6f014-f8a0-4e69-ae60-cc3a7aeaaf1c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ministral_Ministral-3b-instruct/1762652580.360654", + "retrieved_timestamp": "1762652580.360655", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ministral/Ministral-3b-instruct", + "developer": "ministral", + "inference_platform": "unknown", + "id": "ministral/Ministral-3b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1357642167227401 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31918598478332383 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.008308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33825 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10929188829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 3.316 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/Corianas/Neural-Mistral-7B/4fb7a806-1176-474e-a039-b388f050cd45.json b/data/hfopenllm_v2/mistral/Corianas/Neural-Mistral-7B/4fb7a806-1176-474e-a039-b388f050cd45.json new file mode 100644 index 000000000..4086c1fb5 --- /dev/null +++ b/data/hfopenllm_v2/mistral/Corianas/Neural-Mistral-7B/4fb7a806-1176-474e-a039-b388f050cd45.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Corianas_Neural-Mistral-7B/1762652579.511706", + "retrieved_timestamp": "1762652579.5117068", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Corianas/Neural-Mistral-7B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "Corianas/Neural-Mistral-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5489235229191878 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4428023404192858 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0188821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3872708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27376994680851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7/393f8623-7f38-4aaa-a460-cbdcb74c2d04.json b/data/hfopenllm_v2/mistral/Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7/393f8623-7f38-4aaa-a460-cbdcb74c2d04.json new file mode 100644 index 000000000..03d78d97a --- /dev/null +++ b/data/hfopenllm_v2/mistral/Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7/393f8623-7f38-4aaa-a460-cbdcb74c2d04.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Mistral-7b-v0.3-Test-E0.7/1762652579.536513", + "retrieved_timestamp": "1762652579.536514", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7", + "developer": "mistral", + "inference_platform": "unknown", + "id": "Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5123538876846767 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4750220653053363 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.033987915407854986 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40051041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2744348404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/Dans-DiscountModels/mistral-7b-test-merged/5ba7e296-cdd3-40e8-b56f-cc44ef0c3dcb.json b/data/hfopenllm_v2/mistral/Dans-DiscountModels/mistral-7b-test-merged/5ba7e296-cdd3-40e8-b56f-cc44ef0c3dcb.json new file mode 100644 index 000000000..12f16d9f8 --- /dev/null +++ b/data/hfopenllm_v2/mistral/Dans-DiscountModels/mistral-7b-test-merged/5ba7e296-cdd3-40e8-b56f-cc44ef0c3dcb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_mistral-7b-test-merged/1762652579.536763", + "retrieved_timestamp": "1762652579.536763", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Dans-DiscountModels/mistral-7b-test-merged", + "developer": "mistral", + "inference_platform": "unknown", + "id": "Dans-DiscountModels/mistral-7b-test-merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6678003253589365 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48981661658184755 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0445619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3753958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29778922872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/DreadPoor/felix_dies-mistral-7B-model_stock/0444a153-1852-4a0d-959e-750c933777bd.json b/data/hfopenllm_v2/mistral/DreadPoor/felix_dies-mistral-7B-model_stock/0444a153-1852-4a0d-959e-750c933777bd.json new file mode 100644 index 000000000..bdd42376c --- /dev/null +++ b/data/hfopenllm_v2/mistral/DreadPoor/felix_dies-mistral-7B-model_stock/0444a153-1852-4a0d-959e-750c933777bd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DreadPoor_felix_dies-mistral-7B-model_stock/1762652579.5887182", + "retrieved_timestamp": "1762652579.5887191", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DreadPoor/felix_dies-mistral-7B-model_stock", + "developer": "mistral", + "inference_platform": "unknown", + "id": "DreadPoor/felix_dies-mistral-7B-model_stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30077860077926566 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49009180735274227 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4518229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3109208776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo/b798f31f-5fab-4f21-8689-fe832afb873b.json b/data/hfopenllm_v2/mistral/EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo/b798f31f-5fab-4f21-8689-fe832afb873b.json new file mode 100644 index 000000000..3b508cd76 --- /dev/null +++ b/data/hfopenllm_v2/mistral/EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo/b798f31f-5fab-4f21-8689-fe832afb873b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-MathMistral-Nemo-Base-2407-v2dpo/1762652579.612103", + "retrieved_timestamp": "1762652579.612104", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo", + "developer": "mistral", + "inference_platform": "unknown", + "id": "EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30972043067948596 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43276373285682107 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03700906344410876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4029583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11477726063829788 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 11.58 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/FuJhen/mistral_7b_v0.1_structedData_e2e/3ba2b06b-b44a-4ad6-bf38-f1602995c2f9.json b/data/hfopenllm_v2/mistral/FuJhen/mistral_7b_v0.1_structedData_e2e/3ba2b06b-b44a-4ad6-bf38-f1602995c2f9.json new file mode 100644 index 000000000..c1ee875f9 --- /dev/null +++ b/data/hfopenllm_v2/mistral/FuJhen/mistral_7b_v0.1_structedData_e2e/3ba2b06b-b44a-4ad6-bf38-f1602995c2f9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FuJhen_mistral_7b_v0.1_structedData_e2e/1762652579.625389", + "retrieved_timestamp": "1762652579.62539", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FuJhen/mistral_7b_v0.1_structedData_e2e", + "developer": "mistral", + "inference_platform": "unknown", + "id": "FuJhen/mistral_7b_v0.1_structedData_e2e" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17268403391889076 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4113914854984489 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.004531722054380665 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3722916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2810837765957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/FuJhen/mistral_7b_v0.1_structedData_viggo/3008b476-f005-4672-a953-c86b29ba3ef2.json b/data/hfopenllm_v2/mistral/FuJhen/mistral_7b_v0.1_structedData_viggo/3008b476-f005-4672-a953-c86b29ba3ef2.json new file mode 100644 index 000000000..bc8170a33 --- /dev/null +++ b/data/hfopenllm_v2/mistral/FuJhen/mistral_7b_v0.1_structedData_viggo/3008b476-f005-4672-a953-c86b29ba3ef2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/FuJhen_mistral_7b_v0.1_structedData_viggo/1762652579.625654", + "retrieved_timestamp": "1762652579.625655", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "FuJhen/mistral_7b_v0.1_structedData_viggo", + "developer": "mistral", + "inference_platform": "unknown", + "id": "FuJhen/mistral_7b_v0.1_structedData_viggo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17832905579418165 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45238634545986817 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028700906344410877 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37381250000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2942154255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 14.483 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/Locutusque/TinyMistral-248M-v2.5/9a3f7863-0041-4473-b3f0-ad25f0d9310f.json b/data/hfopenllm_v2/mistral/Locutusque/TinyMistral-248M-v2.5/9a3f7863-0041-4473-b3f0-ad25f0d9310f.json new file mode 100644 index 000000000..e45b4c2a4 --- /dev/null +++ b/data/hfopenllm_v2/mistral/Locutusque/TinyMistral-248M-v2.5/9a3f7863-0041-4473-b3f0-ad25f0d9310f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Locutusque_TinyMistral-248M-v2.5/1762652579.73623", + "retrieved_timestamp": "1762652579.7362418", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Locutusque/TinyMistral-248M-v2.5", + "developer": "mistral", + "inference_platform": "unknown", + "id": "Locutusque/TinyMistral-248M-v2.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1336409615376091 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30385761123260785 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.009818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25083892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37815624999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11353058510638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 0.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/M4-ai/TinyMistral-248M-v3/830423e1-ec14-4477-8c82-8516bb8e954f.json b/data/hfopenllm_v2/mistral/M4-ai/TinyMistral-248M-v3/830423e1-ec14-4477-8c82-8516bb8e954f.json new file mode 100644 index 000000000..8f2ea0d5b --- /dev/null +++ b/data/hfopenllm_v2/mistral/M4-ai/TinyMistral-248M-v3/830423e1-ec14-4477-8c82-8516bb8e954f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/M4-ai_TinyMistral-248M-v3/1762652579.742201", + "retrieved_timestamp": "1762652579.742202", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "M4-ai/TinyMistral-248M-v3", + "developer": "mistral", + "inference_platform": "unknown", + "id": "M4-ai/TinyMistral-248M-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16386631914431488 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2884549938995566 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.004531722054380665 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2407718120805369 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3793333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11319813829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 0.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial/5cd26359-d15a-4d0b-92f1-c31101e7b993.json b/data/hfopenllm_v2/mistral/Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial/5cd26359-d15a-4d0b-92f1-c31101e7b993.json new file mode 100644 index 000000000..1eb4e6ff6 --- /dev/null +++ b/data/hfopenllm_v2/mistral/Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial/5cd26359-d15a-4d0b-92f1-c31101e7b993.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Marsouuu_MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial/1762652579.7477188", + "retrieved_timestamp": "1762652579.74772", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial", + "developer": "mistral", + "inference_platform": "unknown", + "id": "Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16973629968483622 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3464368053320647 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3990833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13788231382978725 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.16 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/NousResearch/DeepHermes-3-Mistral-24B-Preview/b1f439ee-711a-41b8-b63d-dd28cb63266e.json b/data/hfopenllm_v2/mistral/NousResearch/DeepHermes-3-Mistral-24B-Preview/b1f439ee-711a-41b8-b63d-dd28cb63266e.json new file mode 100644 index 000000000..f0d1464be --- /dev/null +++ b/data/hfopenllm_v2/mistral/NousResearch/DeepHermes-3-Mistral-24B-Preview/b1f439ee-711a-41b8-b63d-dd28cb63266e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NousResearch_DeepHermes-3-Mistral-24B-Preview/1762652579.78962", + "retrieved_timestamp": "1762652579.7896209", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NousResearch/DeepHermes-3-Mistral-24B-Preview", + "developer": "mistral", + "inference_platform": "unknown", + "id": "NousResearch/DeepHermes-3-Mistral-24B-Preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45357761849669986 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6488196385442672 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25755287009063443 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3699664429530201 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4503333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45902593085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/NousResearch/Hermes-2-Pro-Mistral-7B/b8d954d0-a820-4927-a7f8-b0083cf9db9c.json b/data/hfopenllm_v2/mistral/NousResearch/Hermes-2-Pro-Mistral-7B/b8d954d0-a820-4927-a7f8-b0083cf9db9c.json new file mode 100644 index 000000000..9ef2e2e67 --- /dev/null +++ b/data/hfopenllm_v2/mistral/NousResearch/Hermes-2-Pro-Mistral-7B/b8d954d0-a820-4927-a7f8-b0083cf9db9c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-2-Pro-Mistral-7B/1762652579.790145", + "retrieved_timestamp": "1762652579.790146", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NousResearch/Hermes-2-Pro-Mistral-7B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "NousResearch/Hermes-2-Pro-Mistral-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5668337788179807 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4995435330498075 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43759375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29463098404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/NousResearch/Yarn-Mistral-7b-128k/c6411eb6-8304-49e6-ac7b-5300deb27c55.json b/data/hfopenllm_v2/mistral/NousResearch/Yarn-Mistral-7b-128k/c6411eb6-8304-49e6-ac7b-5300deb27c55.json new file mode 100644 index 000000000..7b65b043c --- /dev/null +++ b/data/hfopenllm_v2/mistral/NousResearch/Yarn-Mistral-7b-128k/c6411eb6-8304-49e6-ac7b-5300deb27c55.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Mistral-7b-128k/1762652579.793008", + "retrieved_timestamp": "1762652579.7930088", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NousResearch/Yarn-Mistral-7b-128k", + "developer": "mistral", + "inference_platform": "unknown", + "id": "NousResearch/Yarn-Mistral-7b-128k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19336693307091848 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4314467711273296 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03172205438066465 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4070520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.289311835106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/NousResearch/Yarn-Mistral-7b-64k/c7fcd944-78ab-422d-b0ef-8dc394266473.json b/data/hfopenllm_v2/mistral/NousResearch/Yarn-Mistral-7b-64k/c7fcd944-78ab-422d-b0ef-8dc394266473.json new file mode 100644 index 000000000..caac7fc8e --- /dev/null +++ b/data/hfopenllm_v2/mistral/NousResearch/Yarn-Mistral-7b-64k/c7fcd944-78ab-422d-b0ef-8dc394266473.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Mistral-7b-64k/1762652579.7932239", + "retrieved_timestamp": "1762652579.793225", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NousResearch/Yarn-Mistral-7b-64k", + "developer": "mistral", + "inference_platform": "unknown", + "id": "NousResearch/Yarn-Mistral-7b-64k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2079548930171944 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42931904551037814 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03700906344410876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41238541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2913896276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/Open-Orca/Mistral-7B-OpenOrca/c6e0aa8c-8765-4e2f-a6b2-cdeb885d29a4.json b/data/hfopenllm_v2/mistral/Open-Orca/Mistral-7B-OpenOrca/c6e0aa8c-8765-4e2f-a6b2-cdeb885d29a4.json new file mode 100644 index 000000000..72cd9669a --- /dev/null +++ b/data/hfopenllm_v2/mistral/Open-Orca/Mistral-7B-OpenOrca/c6e0aa8c-8765-4e2f-a6b2-cdeb885d29a4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Open-Orca_Mistral-7B-OpenOrca/1762652579.799384", + "retrieved_timestamp": "1762652579.799385", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Open-Orca/Mistral-7B-OpenOrca", + "developer": "mistral", + "inference_platform": "unknown", + "id": "Open-Orca/Mistral-7B-OpenOrca" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4977659277384008 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4768173517353546 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.035498489425981876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38578124999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26529255319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/PranavHarshan/LaMistral-V4/21944667-04e0-46dc-9896-eef32c26fa6b.json b/data/hfopenllm_v2/mistral/PranavHarshan/LaMistral-V4/21944667-04e0-46dc-9896-eef32c26fa6b.json new file mode 100644 index 000000000..9b8003fec --- /dev/null +++ b/data/hfopenllm_v2/mistral/PranavHarshan/LaMistral-V4/21944667-04e0-46dc-9896-eef32c26fa6b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/PranavHarshan_LaMistral-V4/1762652579.8148758", + "retrieved_timestamp": "1762652579.814877", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "PranavHarshan/LaMistral-V4", + "developer": "mistral", + "inference_platform": "unknown", + "id": "PranavHarshan/LaMistral-V4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.623861354539289 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5184255342586473 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06873111782477341 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3642916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35987367021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2/56d07a1f-1f1f-4559-b57d-bee3bf884860.json b/data/hfopenllm_v2/mistral/Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2/56d07a1f-1f1f-4559-b57d-bee3bf884860.json new file mode 100644 index 000000000..d47f15826 --- /dev/null +++ b/data/hfopenllm_v2/mistral/Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2/56d07a1f-1f1f-4559-b57d-bee3bf884860.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Pretergeek_openchat-3.5-0106_Rebased_Mistral-7B-v0.2/1762652579.817152", + "retrieved_timestamp": "1762652579.817153", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2", + "developer": "mistral", + "inference_platform": "unknown", + "id": "Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37062106322335847 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36271140677296004 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4840104166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2829953457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/TTTXXX01/Mistral-7B-Base-SimPO2-5e-7/062d38c7-07e6-4f71-a7a3-e40a187b6f77.json b/data/hfopenllm_v2/mistral/TTTXXX01/Mistral-7B-Base-SimPO2-5e-7/062d38c7-07e6-4f71-a7a3-e40a187b6f77.json new file mode 100644 index 000000000..a34889d72 --- /dev/null +++ b/data/hfopenllm_v2/mistral/TTTXXX01/Mistral-7B-Base-SimPO2-5e-7/062d38c7-07e6-4f71-a7a3-e40a187b6f77.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TTTXXX01_Mistral-7B-Base-SimPO2-5e-7/1762652579.911438", + "retrieved_timestamp": "1762652579.9114392", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TTTXXX01/Mistral-7B-Base-SimPO2-5e-7", + "developer": "mistral", + "inference_platform": "unknown", + "id": "TTTXXX01/Mistral-7B-Base-SimPO2-5e-7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43918912928806675 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43195515014882774 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36041666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2765957446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/TencentARC/MetaMath-Mistral-Pro/c2274449-ebc7-4e53-94bf-82e1f6810f6b.json b/data/hfopenllm_v2/mistral/TencentARC/MetaMath-Mistral-Pro/c2274449-ebc7-4e53-94bf-82e1f6810f6b.json new file mode 100644 index 000000000..b5d6e3087 --- /dev/null +++ b/data/hfopenllm_v2/mistral/TencentARC/MetaMath-Mistral-Pro/c2274449-ebc7-4e53-94bf-82e1f6810f6b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TencentARC_MetaMath-Mistral-Pro/1762652579.913366", + "retrieved_timestamp": "1762652579.913366", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TencentARC/MetaMath-Mistral-Pro", + "developer": "mistral", + "inference_platform": "unknown", + "id": "TencentARC/MetaMath-Mistral-Pro" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21187670935340452 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44131618555883606 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07628398791540786 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35241666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2471742021276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.987 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/TencentARC/Mistral_Pro_8B_v0.1/07ac72af-fa7e-4fe2-8a67-e893edbbd206.json b/data/hfopenllm_v2/mistral/TencentARC/Mistral_Pro_8B_v0.1/07ac72af-fa7e-4fe2-8a67-e893edbbd206.json new file mode 100644 index 000000000..3e6e79a66 --- /dev/null +++ b/data/hfopenllm_v2/mistral/TencentARC/Mistral_Pro_8B_v0.1/07ac72af-fa7e-4fe2-8a67-e893edbbd206.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/TencentARC_Mistral_Pro_8B_v0.1/1762652579.913616", + "retrieved_timestamp": "1762652579.913617", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "TencentARC/Mistral_Pro_8B_v0.1", + "developer": "mistral", + "inference_platform": "unknown", + "id": "TencentARC/Mistral_Pro_8B_v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21145227995053123 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4525975968066435 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42422916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2765126329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.987 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/Triangle104/Mistral-Redemption-Arc/189f08b4-7e58-4820-9ff7-bcea4530e3dd.json b/data/hfopenllm_v2/mistral/Triangle104/Mistral-Redemption-Arc/189f08b4-7e58-4820-9ff7-bcea4530e3dd.json new file mode 100644 index 000000000..919674bd8 --- /dev/null +++ b/data/hfopenllm_v2/mistral/Triangle104/Mistral-Redemption-Arc/189f08b4-7e58-4820-9ff7-bcea4530e3dd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Mistral-Redemption-Arc/1762652579.929934", + "retrieved_timestamp": "1762652579.9299352", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Mistral-Redemption-Arc", + "developer": "mistral", + "inference_platform": "unknown", + "id": "Triangle104/Mistral-Redemption-Arc" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40289432040319684 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6254876729064861 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41012084592145015 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34731543624161076 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45951041666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4509640957446808 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/Triangle104/Mistral-Small-24b-Harmony/e8d645e6-8ec4-4c0c-8cf2-8aa7e126e1f1.json b/data/hfopenllm_v2/mistral/Triangle104/Mistral-Small-24b-Harmony/e8d645e6-8ec4-4c0c-8cf2-8aa7e126e1f1.json new file mode 100644 index 000000000..f2c5c8570 --- /dev/null +++ b/data/hfopenllm_v2/mistral/Triangle104/Mistral-Small-24b-Harmony/e8d645e6-8ec4-4c0c-8cf2-8aa7e126e1f1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Triangle104_Mistral-Small-24b-Harmony/1762652579.930191", + "retrieved_timestamp": "1762652579.9301918", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Triangle104/Mistral-Small-24b-Harmony", + "developer": "mistral", + "inference_platform": "unknown", + "id": "Triangle104/Mistral-Small-24b-Harmony" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16871234989826994 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6433732705921861 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19108761329305135 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38422818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4276041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5430518617021277 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1/01c4d932-bdcf-4840-83cb-e441585d70e2.json b/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1/01c4d932-bdcf-4840-83cb-e441585d70e2.json new file mode 100644 index 000000000..2dde8e96c --- /dev/null +++ b/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1/01c4d932-bdcf-4840-83cb-e441585d70e2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter1/1762652579.9377868", + "retrieved_timestamp": "1762652579.937788", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1", + "developer": "mistral", + "inference_platform": "unknown", + "id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5047352136774869 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4468056921650662 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.024924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3991770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26953125 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2/b0e6d5e1-3f41-4dfc-8845-b6d028820816.json b/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2/b0e6d5e1-3f41-4dfc-8845-b6d028820816.json new file mode 100644 index 000000000..0e74daaea --- /dev/null +++ b/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2/b0e6d5e1-3f41-4dfc-8845-b6d028820816.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter2/1762652579.937983", + "retrieved_timestamp": "1762652579.937984", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2", + "developer": "mistral", + "inference_platform": "unknown", + "id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4445848127413041 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4465719945610438 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40854166666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2677027925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3/66cc8076-71be-43fc-9efb-edd8ad19a6b6.json b/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3/66cc8076-71be-43fc-9efb-edd8ad19a6b6.json new file mode 100644 index 000000000..71d06afe8 --- /dev/null +++ b/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3/66cc8076-71be-43fc-9efb-edd8ad19a6b6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter3/1762652579.938179", + "retrieved_timestamp": "1762652579.9381802", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3", + "developer": "mistral", + "inference_platform": "unknown", + "id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4350678422142138 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4396587862984616 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.023413897280966767 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40711458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2657912234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO/01613adc-1206-4695-ae19-31f2b7ee0d9d.json b/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO/01613adc-1206-4695-ae19-31f2b7ee0d9d.json new file mode 100644 index 000000000..9145ab116 --- /dev/null +++ b/data/hfopenllm_v2/mistral/UCLA-AGI/Mistral7B-PairRM-SPPO/01613adc-1206-4695-ae19-31f2b7ee0d9d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/UCLA-AGI_Mistral7B-PairRM-SPPO/1762652579.93755", + "retrieved_timestamp": "1762652579.93755", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "UCLA-AGI/Mistral7B-PairRM-SPPO", + "developer": "mistral", + "inference_platform": "unknown", + "id": "UCLA-AGI/Mistral7B-PairRM-SPPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43549227161708715 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4438979817093698 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030966767371601207 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39647916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26205119680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/Unbabel/TowerInstruct-Mistral-7B-v0.2/cc6d8d11-2273-41fa-95eb-5d1f7d4a2311.json b/data/hfopenllm_v2/mistral/Unbabel/TowerInstruct-Mistral-7B-v0.2/cc6d8d11-2273-41fa-95eb-5d1f7d4a2311.json new file mode 100644 index 000000000..c9e703f6c --- /dev/null +++ b/data/hfopenllm_v2/mistral/Unbabel/TowerInstruct-Mistral-7B-v0.2/cc6d8d11-2273-41fa-95eb-5d1f7d4a2311.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Unbabel_TowerInstruct-Mistral-7B-v0.2/1762652579.938655", + "retrieved_timestamp": "1762652579.938656", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Unbabel/TowerInstruct-Mistral-7B-v0.2", + "developer": "mistral", + "inference_platform": "unknown", + "id": "Unbabel/TowerInstruct-Mistral-7B-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2843422119975 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.388195180992626 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24748322147651006 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4522291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19680851063829788 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/allknowingroger/Mistralmash1-7B-s/c5e7d08d-4430-43f6-a293-5381b2f13ca6.json b/data/hfopenllm_v2/mistral/allknowingroger/Mistralmash1-7B-s/c5e7d08d-4430-43f6-a293-5381b2f13ca6.json new file mode 100644 index 000000000..0c48d6bea --- /dev/null +++ b/data/hfopenllm_v2/mistral/allknowingroger/Mistralmash1-7B-s/c5e7d08d-4430-43f6-a293-5381b2f13ca6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Mistralmash1-7B-s/1762652579.990727", + "retrieved_timestamp": "1762652579.990727", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Mistralmash1-7B-s", + "developer": "mistral", + "inference_platform": "unknown", + "id": "allknowingroger/Mistralmash1-7B-s" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39610012544493056 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5277485757172445 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09214501510574018 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4267083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3292885638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/allknowingroger/Mistralmash2-7B-s/7a9d4b20-e704-4f50-a09b-ccb67d417824.json b/data/hfopenllm_v2/mistral/allknowingroger/Mistralmash2-7B-s/7a9d4b20-e704-4f50-a09b-ccb67d417824.json new file mode 100644 index 000000000..5b97665dd --- /dev/null +++ b/data/hfopenllm_v2/mistral/allknowingroger/Mistralmash2-7B-s/7a9d4b20-e704-4f50-a09b-ccb67d417824.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allknowingroger_Mistralmash2-7B-s/1762652579.991016", + "retrieved_timestamp": "1762652579.9910169", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allknowingroger/Mistralmash2-7B-s", + "developer": "mistral", + "inference_platform": "unknown", + "id": "allknowingroger/Mistralmash2-7B-s" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4101883003763348 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.530485814102601 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07930513595166164 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43724999999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3345246010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/allura-org/Mistral-Small-24b-Sertraline-0304/34f35618-3ecf-4704-ab7a-ec9e8a5d08c1.json b/data/hfopenllm_v2/mistral/allura-org/Mistral-Small-24b-Sertraline-0304/34f35618-3ecf-4704-ab7a-ec9e8a5d08c1.json new file mode 100644 index 000000000..581b8851c --- /dev/null +++ b/data/hfopenllm_v2/mistral/allura-org/Mistral-Small-24b-Sertraline-0304/34f35618-3ecf-4704-ab7a-ec9e8a5d08c1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allura-org_Mistral-Small-24b-Sertraline-0304/1762652580.007422", + "retrieved_timestamp": "1762652580.007423", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allura-org/Mistral-Small-24b-Sertraline-0304", + "developer": "mistral", + "inference_platform": "unknown", + "id": "allura-org/Mistral-Small-24b-Sertraline-0304" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6799902037704402 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6524908933699552 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22280966767371602 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35151006711409394 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4395104166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5105551861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/allura-org/Mistral-Small-Sisyphus-24b-2503/ce2ee38f-cb48-403f-894d-f2824d00a388.json b/data/hfopenllm_v2/mistral/allura-org/Mistral-Small-Sisyphus-24b-2503/ce2ee38f-cb48-403f-894d-f2824d00a388.json new file mode 100644 index 000000000..114e7c24d --- /dev/null +++ b/data/hfopenllm_v2/mistral/allura-org/Mistral-Small-Sisyphus-24b-2503/ce2ee38f-cb48-403f-894d-f2824d00a388.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/allura-org_Mistral-Small-Sisyphus-24b-2503/1762652580.007755", + "retrieved_timestamp": "1762652580.007756", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "allura-org/Mistral-Small-Sisyphus-24b-2503", + "developer": "mistral", + "inference_platform": "unknown", + "id": "allura-org/Mistral-Small-Sisyphus-24b-2503" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6848362345243952 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6269790835863639 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39768749999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5127160904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/amazon/MegaBeam-Mistral-7B-300k/4729a245-9e2d-4f65-bf14-67db4bb2590f.json b/data/hfopenllm_v2/mistral/amazon/MegaBeam-Mistral-7B-300k/4729a245-9e2d-4f65-bf14-67db4bb2590f.json new file mode 100644 index 000000000..ec66e232f --- /dev/null +++ b/data/hfopenllm_v2/mistral/amazon/MegaBeam-Mistral-7B-300k/4729a245-9e2d-4f65-bf14-67db4bb2590f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/amazon_MegaBeam-Mistral-7B-300k/1762652580.010282", + "retrieved_timestamp": "1762652580.010283", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "amazon/MegaBeam-Mistral-7B-300k", + "developer": "mistral", + "inference_platform": "unknown", + "id": "amazon/MegaBeam-Mistral-7B-300k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.520347123410329 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4227731731112974 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.021148036253776436 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39799999999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2549035904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-1-over-2/3bccbf0f-e578-426d-93bc-84364f7d8017.json b/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-1-over-2/3bccbf0f-e578-426d-93bc-84364f7d8017.json new file mode 100644 index 000000000..5d9ae8688 --- /dev/null +++ b/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-1-over-2/3bccbf0f-e578-426d-93bc-84364f7d8017.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/awnr_Mistral-7B-v0.1-signtensors-1-over-2/1762652580.020659", + "retrieved_timestamp": "1762652580.020659", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "awnr/Mistral-7B-v0.1-signtensors-1-over-2", + "developer": "mistral", + "inference_platform": "unknown", + "id": "awnr/Mistral-7B-v0.1-signtensors-1-over-2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21792178087474567 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4422884892437673 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.033987915407854986 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40060416666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2999501329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-1-over-4/ac1010e3-b3d8-4b61-ba79-0dcedb68619d.json b/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-1-over-4/ac1010e3-b3d8-4b61-ba79-0dcedb68619d.json new file mode 100644 index 000000000..191109d27 --- /dev/null +++ b/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-1-over-4/ac1010e3-b3d8-4b61-ba79-0dcedb68619d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/awnr_Mistral-7B-v0.1-signtensors-1-over-4/1762652580.0209029", + "retrieved_timestamp": "1762652580.0209038", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "awnr/Mistral-7B-v0.1-signtensors-1-over-4", + "developer": "mistral", + "inference_platform": "unknown", + "id": "awnr/Mistral-7B-v0.1-signtensors-1-over-4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2133007087860211 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35070947402846286 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.024924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34603125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2310505319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-3-over-8/12f4db59-10fe-47d0-86df-343ea8978249.json b/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-3-over-8/12f4db59-10fe-47d0-86df-343ea8978249.json new file mode 100644 index 000000000..044ab23d2 --- /dev/null +++ b/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-3-over-8/12f4db59-10fe-47d0-86df-343ea8978249.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/awnr_Mistral-7B-v0.1-signtensors-3-over-8/1762652580.02111", + "retrieved_timestamp": "1762652580.021111", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "awnr/Mistral-7B-v0.1-signtensors-3-over-8", + "developer": "mistral", + "inference_platform": "unknown", + "id": "awnr/Mistral-7B-v0.1-signtensors-3-over-8" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23942915907569692 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4299940969601492 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03323262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38175000000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30011635638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-5-over-16/b0ae93c7-b251-42df-a67f-ca8b8a865937.json b/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-5-over-16/b0ae93c7-b251-42df-a67f-ca8b8a865937.json new file mode 100644 index 000000000..084acf68b --- /dev/null +++ b/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-5-over-16/b0ae93c7-b251-42df-a67f-ca8b8a865937.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/awnr_Mistral-7B-v0.1-signtensors-5-over-16/1762652580.021311", + "retrieved_timestamp": "1762652580.021312", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "awnr/Mistral-7B-v0.1-signtensors-5-over-16", + "developer": "mistral", + "inference_platform": "unknown", + "id": "awnr/Mistral-7B-v0.1-signtensors-5-over-16" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21182684166899385 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4124151161773006 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3686041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29579454787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-7-over-16/893da954-ca56-42ab-914d-44fbc4a6f1ff.json b/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-7-over-16/893da954-ca56-42ab-914d-44fbc4a6f1ff.json new file mode 100644 index 000000000..62ea06ec2 --- /dev/null +++ b/data/hfopenllm_v2/mistral/awnr/Mistral-7B-v0.1-signtensors-7-over-16/893da954-ca56-42ab-914d-44fbc4a6f1ff.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/awnr_Mistral-7B-v0.1-signtensors-7-over-16/1762652580.0215192", + "retrieved_timestamp": "1762652580.02152", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "awnr/Mistral-7B-v0.1-signtensors-7-over-16", + "developer": "mistral", + "inference_platform": "unknown", + "id": "awnr/Mistral-7B-v0.1-signtensors-7-over-16" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22936253584932426 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43158208189876196 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39520833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30302526595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/aws-prototyping/MegaBeam-Mistral-7B-512k/f05d6512-16ca-4f44-a31f-392f8f71da74.json b/data/hfopenllm_v2/mistral/aws-prototyping/MegaBeam-Mistral-7B-512k/f05d6512-16ca-4f44-a31f-392f8f71da74.json new file mode 100644 index 000000000..dc34bbeda --- /dev/null +++ b/data/hfopenllm_v2/mistral/aws-prototyping/MegaBeam-Mistral-7B-512k/f05d6512-16ca-4f44-a31f-392f8f71da74.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/aws-prototyping_MegaBeam-Mistral-7B-512k/1762652580.0217311", + "retrieved_timestamp": "1762652580.0217311", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "aws-prototyping/MegaBeam-Mistral-7B-512k", + "developer": "mistral", + "inference_platform": "unknown", + "id": "aws-prototyping/MegaBeam-Mistral-7B-512k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5972586071623293 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3662336639946533 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028700906344410877 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3993645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25889295212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/axolotl-ai-co/romulus-mistral-nemo-12b-simpo/3f48c9eb-dbfa-4035-96a6-d4f516fa1e80.json b/data/hfopenllm_v2/mistral/axolotl-ai-co/romulus-mistral-nemo-12b-simpo/3f48c9eb-dbfa-4035-96a6-d4f516fa1e80.json new file mode 100644 index 000000000..6f49afe69 --- /dev/null +++ b/data/hfopenllm_v2/mistral/axolotl-ai-co/romulus-mistral-nemo-12b-simpo/3f48c9eb-dbfa-4035-96a6-d4f516fa1e80.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/axolotl-ai-co_romulus-mistral-nemo-12b-simpo/1762652580.021987", + "retrieved_timestamp": "1762652580.0219882", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "axolotl-ai-co/romulus-mistral-nemo-12b-simpo", + "developer": "mistral", + "inference_platform": "unknown", + "id": "axolotl-ai-co/romulus-mistral-nemo-12b-simpo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.607924750772395 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5395057669562011 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11404833836858005 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42330208333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3469082446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407/9cd84a08-1f21-42ad-b8c0-eeb2df93ee29.json b/data/hfopenllm_v2/mistral/bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407/9cd84a08-1f21-42ad-b8c0-eeb2df93ee29.json new file mode 100644 index 000000000..a278448fe --- /dev/null +++ b/data/hfopenllm_v2/mistral/bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407/9cd84a08-1f21-42ad-b8c0-eeb2df93ee29.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/bamec66557_Mistral-Nemo-VICIOUS_MESH-12B-2407/1762652580.026026", + "retrieved_timestamp": "1762652580.026027", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407", + "developer": "mistral", + "inference_platform": "unknown", + "id": "bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6705729686121713 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5155964285724085 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13670694864048338 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4309895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36768617021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/cckm/tinymistral_950m/d0dbcd95-252f-46e0-9699-81b293cb7db5.json b/data/hfopenllm_v2/mistral/cckm/tinymistral_950m/d0dbcd95-252f-46e0-9699-81b293cb7db5.json new file mode 100644 index 000000000..5cda9878f --- /dev/null +++ b/data/hfopenllm_v2/mistral/cckm/tinymistral_950m/d0dbcd95-252f-46e0-9699-81b293cb7db5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/cckm_tinymistral_950m/1762652580.099487", + "retrieved_timestamp": "1762652580.099488", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "cckm/tinymistral_950m", + "developer": "mistral", + "inference_platform": "unknown", + "id": "cckm/tinymistral_950m" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23952889444451833 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29694562621388126 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.005287009063444109 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3553645833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10962433510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 0.955 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/chujiezheng/Mistral7B-PairRM-SPPO-ExPO/d7e88fea-5c3d-4b9c-85a9-a0cf35a97ea0.json b/data/hfopenllm_v2/mistral/chujiezheng/Mistral7B-PairRM-SPPO-ExPO/d7e88fea-5c3d-4b9c-85a9-a0cf35a97ea0.json new file mode 100644 index 000000000..f034a6d9c --- /dev/null +++ b/data/hfopenllm_v2/mistral/chujiezheng/Mistral7B-PairRM-SPPO-ExPO/d7e88fea-5c3d-4b9c-85a9-a0cf35a97ea0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/chujiezheng_Mistral7B-PairRM-SPPO-ExPO/1762652580.101214", + "retrieved_timestamp": "1762652580.101215", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "chujiezheng/Mistral7B-PairRM-SPPO-ExPO", + "developer": "mistral", + "inference_platform": "unknown", + "id": "chujiezheng/Mistral7B-PairRM-SPPO-ExPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36734863495525205 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3882191262277366 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01812688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40553124999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2551529255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/flammenai/Mahou-1.2a-mistral-7B/d9804b0c-37db-492f-a1ba-851137e697f0.json b/data/hfopenllm_v2/mistral/flammenai/Mahou-1.2a-mistral-7B/d9804b0c-37db-492f-a1ba-851137e697f0.json new file mode 100644 index 000000000..3a07d449f --- /dev/null +++ b/data/hfopenllm_v2/mistral/flammenai/Mahou-1.2a-mistral-7B/d9804b0c-37db-492f-a1ba-851137e697f0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/flammenai_Mahou-1.2a-mistral-7B/1762652580.155141", + "retrieved_timestamp": "1762652580.155141", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "flammenai/Mahou-1.2a-mistral-7B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "flammenai/Mahou-1.2a-mistral-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4552010886669592 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5118111474458115 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06873111782477341 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38962500000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31632313829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/flammenai/Mahou-1.5-mistral-nemo-12B/1c4e9e6a-7bb8-410f-9a3b-f88ea0ed474c.json b/data/hfopenllm_v2/mistral/flammenai/Mahou-1.5-mistral-nemo-12B/1c4e9e6a-7bb8-410f-9a3b-f88ea0ed474c.json new file mode 100644 index 000000000..5d939aa5c --- /dev/null +++ b/data/hfopenllm_v2/mistral/flammenai/Mahou-1.5-mistral-nemo-12B/1c4e9e6a-7bb8-410f-9a3b-f88ea0ed474c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/flammenai_Mahou-1.5-mistral-nemo-12B/1762652580.155725", + "retrieved_timestamp": "1762652580.1557262", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "flammenai/Mahou-1.5-mistral-nemo-12B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "flammenai/Mahou-1.5-mistral-nemo-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6751441730164851 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5522361927910235 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08685800604229607 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4520416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3602061170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/hotmailuser/Mistral-modelstock-24B/58269430-efba-4d04-a69e-8ef666f2afee.json b/data/hfopenllm_v2/mistral/hotmailuser/Mistral-modelstock-24B/58269430-efba-4d04-a69e-8ef666f2afee.json new file mode 100644 index 000000000..5be9988a3 --- /dev/null +++ b/data/hfopenllm_v2/mistral/hotmailuser/Mistral-modelstock-24B/58269430-efba-4d04-a69e-8ef666f2afee.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_Mistral-modelstock-24B/1762652580.195392", + "retrieved_timestamp": "1762652580.195392", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/Mistral-modelstock-24B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "hotmailuser/Mistral-modelstock-24B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3424192254329623 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.645229041403176 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13066465256797583 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41023489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4590416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5069813829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/hotmailuser/Mistral-modelstock2-24B/7c9aa35b-3d8e-4b3f-8ae7-35698a1f1c70.json b/data/hfopenllm_v2/mistral/hotmailuser/Mistral-modelstock2-24B/7c9aa35b-3d8e-4b3f-8ae7-35698a1f1c70.json new file mode 100644 index 000000000..d526f05e5 --- /dev/null +++ b/data/hfopenllm_v2/mistral/hotmailuser/Mistral-modelstock2-24B/7c9aa35b-3d8e-4b3f-8ae7-35698a1f1c70.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/hotmailuser_Mistral-modelstock2-24B/1762652580.195659", + "retrieved_timestamp": "1762652580.19566", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "hotmailuser/Mistral-modelstock2-24B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "hotmailuser/Mistral-modelstock2-24B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43184528163051816 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6689381929188762 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24018126888217523 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3926174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46161458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5318317819148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/irahulpandey/mistralai-7B-slerp-v0.1/034c23f5-6c03-4cee-b6b2-7263426cf975.json b/data/hfopenllm_v2/mistral/irahulpandey/mistralai-7B-slerp-v0.1/034c23f5-6c03-4cee-b6b2-7263426cf975.json new file mode 100644 index 000000000..186634d98 --- /dev/null +++ b/data/hfopenllm_v2/mistral/irahulpandey/mistralai-7B-slerp-v0.1/034c23f5-6c03-4cee-b6b2-7263426cf975.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/irahulpandey_mistralai-7B-slerp-v0.1/1762652580.23053", + "retrieved_timestamp": "1762652580.230531", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "irahulpandey/mistralai-7B-slerp-v0.1", + "developer": "mistral", + "inference_platform": "unknown", + "id": "irahulpandey/mistralai-7B-slerp-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4966167546554254 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5010682924547378 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45497916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2951296542553192 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/kaist-ai/mistral-orpo-capybara-7k/811cf797-62a1-4fda-960c-ee51f3e24a03.json b/data/hfopenllm_v2/mistral/kaist-ai/mistral-orpo-capybara-7k/811cf797-62a1-4fda-960c-ee51f3e24a03.json new file mode 100644 index 000000000..efbc09345 --- /dev/null +++ b/data/hfopenllm_v2/mistral/kaist-ai/mistral-orpo-capybara-7k/811cf797-62a1-4fda-960c-ee51f3e24a03.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/kaist-ai_mistral-orpo-capybara-7k/1762652580.30416", + "retrieved_timestamp": "1762652580.304161", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "kaist-ai/mistral-orpo-capybara-7k", + "developer": "mistral", + "inference_platform": "unknown", + "id": "kaist-ai/mistral-orpo-capybara-7k" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.536733644507684 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4488995185492166 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03927492447129909 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3963541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.297124335106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/llmat/Mistral-v0.3-7B-ORPO/04a1b79b-a5af-420d-829b-0750341490cf.json b/data/hfopenllm_v2/mistral/llmat/Mistral-v0.3-7B-ORPO/04a1b79b-a5af-420d-829b-0750341490cf.json new file mode 100644 index 000000000..17cecf052 --- /dev/null +++ b/data/hfopenllm_v2/mistral/llmat/Mistral-v0.3-7B-ORPO/04a1b79b-a5af-420d-829b-0750341490cf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/llmat_Mistral-v0.3-7B-ORPO/1762652580.325205", + "retrieved_timestamp": "1762652580.325206", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llmat/Mistral-v0.3-7B-ORPO", + "developer": "mistral", + "inference_platform": "unknown", + "id": "llmat/Mistral-v0.3-7B-ORPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3639764713183243 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.400465557804411 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0015105740181268882 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3528541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23013630319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/llmat/Mistral-v0.3-7B-ORPO/ff710b55-0a89-4582-8caa-867efb88cf98.json b/data/hfopenllm_v2/mistral/llmat/Mistral-v0.3-7B-ORPO/ff710b55-0a89-4582-8caa-867efb88cf98.json new file mode 100644 index 000000000..660f45365 --- /dev/null +++ b/data/hfopenllm_v2/mistral/llmat/Mistral-v0.3-7B-ORPO/ff710b55-0a89-4582-8caa-867efb88cf98.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/llmat_Mistral-v0.3-7B-ORPO/1762652580.324949", + "retrieved_timestamp": "1762652580.324949", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "llmat/Mistral-v0.3-7B-ORPO", + "developer": "mistral", + "inference_platform": "unknown", + "id": "llmat/Mistral-v0.3-7B-ORPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3770406964631622 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39776607302918093 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02416918429003021 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35552083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2278091755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/migtissera/Tess-3-Mistral-Nemo-12B/7ef5c287-cf98-429f-80c3-d71743612a73.json b/data/hfopenllm_v2/mistral/migtissera/Tess-3-Mistral-Nemo-12B/7ef5c287-cf98-429f-80c3-d71743612a73.json new file mode 100644 index 000000000..2549f80d8 --- /dev/null +++ b/data/hfopenllm_v2/mistral/migtissera/Tess-3-Mistral-Nemo-12B/7ef5c287-cf98-429f-80c3-d71743612a73.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/migtissera_Tess-3-Mistral-Nemo-12B/1762652580.358769", + "retrieved_timestamp": "1762652580.35877", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "migtissera/Tess-3-Mistral-Nemo-12B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "migtissera/Tess-3-Mistral-Nemo-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.335499807178287 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.489942302453045 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25083892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44578125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25648271276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/mistral-community/Mistral-7B-v0.2/a65136c6-b3d7-4107-8d3a-0ce84b77965b.json b/data/hfopenllm_v2/mistral/mistral-community/Mistral-7B-v0.2/a65136c6-b3d7-4107-8d3a-0ce84b77965b.json new file mode 100644 index 000000000..0c1052c48 --- /dev/null +++ b/data/hfopenllm_v2/mistral/mistral-community/Mistral-7B-v0.2/a65136c6-b3d7-4107-8d3a-0ce84b77965b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistral-community_Mistral-7B-v0.2/1762652580.360901", + "retrieved_timestamp": "1762652580.3609018", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-community/Mistral-7B-v0.2", + "developer": "mistral", + "inference_platform": "unknown", + "id": "mistral-community/Mistral-7B-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22663976028050017 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4510187962797583 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030211480362537766 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4031770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2952958776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/mistral-community/Mixtral-8x22B-v0.1/810fc203-f10a-49ad-8a6f-58cbd70f2205.json b/data/hfopenllm_v2/mistral/mistral-community/Mixtral-8x22B-v0.1/810fc203-f10a-49ad-8a6f-58cbd70f2205.json new file mode 100644 index 000000000..f70230d3d --- /dev/null +++ b/data/hfopenllm_v2/mistral/mistral-community/Mixtral-8x22B-v0.1/810fc203-f10a-49ad-8a6f-58cbd70f2205.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistral-community_Mixtral-8x22B-v0.1/1762652580.361141", + "retrieved_timestamp": "1762652580.361141", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-community/Mixtral-8x22B-v0.1", + "developer": "mistral", + "inference_platform": "unknown", + "id": "mistral-community/Mixtral-8x22B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3166564417177914 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38000000000000006 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15428571428571428 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35333333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Unknown", + "params_billions": 0.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/mistral-community/mixtral-8x22B-v0.3/abeddace-67d6-484a-b410-95d92819dfe5.json b/data/hfopenllm_v2/mistral/mistral-community/mixtral-8x22B-v0.3/abeddace-67d6-484a-b410-95d92819dfe5.json new file mode 100644 index 000000000..389b167f7 --- /dev/null +++ b/data/hfopenllm_v2/mistral/mistral-community/mixtral-8x22B-v0.3/abeddace-67d6-484a-b410-95d92819dfe5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistral-community_mixtral-8x22B-v0.3/1762652580.361342", + "retrieved_timestamp": "1762652580.361343", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistral-community/mixtral-8x22B-v0.3", + "developer": "mistral", + "inference_platform": "unknown", + "id": "mistral-community/mixtral-8x22B-v0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25826362939223485 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6250002178435845 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18353474320241692 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3775167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4036979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46392952127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 140.63 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/mistralai/Codestral-22B-v0.1/b6fa1ae6-3df8-437d-a844-3fa022c12370.json b/data/hfopenllm_v2/mistral/mistralai/Codestral-22B-v0.1/b6fa1ae6-3df8-437d-a844-3fa022c12370.json new file mode 100644 index 000000000..8ef2c1deb --- /dev/null +++ b/data/hfopenllm_v2/mistral/mistralai/Codestral-22B-v0.1/b6fa1ae6-3df8-437d-a844-3fa022c12370.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistralai_Codestral-22B-v0.1/1762652580.361543", + "retrieved_timestamp": "1762652580.361544", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistralai/Codestral-22B-v0.1", + "developer": "mistral", + "inference_platform": "unknown", + "id": "mistralai/Codestral-22B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5771752283939946 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5139136921003167 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10045317220543806 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4187083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3155751329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/mistralai/Mistral-7B-v0.1/44381c62-a310-4f01-bd66-9d1434638cf4.json b/data/hfopenllm_v2/mistral/mistralai/Mistral-7B-v0.1/44381c62-a310-4f01-bd66-9d1434638cf4.json new file mode 100644 index 000000000..14e5c750f --- /dev/null +++ b/data/hfopenllm_v2/mistral/mistralai/Mistral-7B-v0.1/44381c62-a310-4f01-bd66-9d1434638cf4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-7B-v0.1/1762652580.362653", + "retrieved_timestamp": "1762652580.362654", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistralai/Mistral-7B-v0.1", + "developer": "mistral", + "inference_platform": "unknown", + "id": "mistralai/Mistral-7B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2385548123423627 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4419401145517045 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4139375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30127992021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/mistralai/Mistral-7B-v0.3/1a3acc9e-b2cd-4f80-8fcc-b227eee29f26.json b/data/hfopenllm_v2/mistral/mistralai/Mistral-7B-v0.3/1a3acc9e-b2cd-4f80-8fcc-b227eee29f26.json new file mode 100644 index 000000000..fbb368675 --- /dev/null +++ b/data/hfopenllm_v2/mistral/mistralai/Mistral-7B-v0.3/1a3acc9e-b2cd-4f80-8fcc-b227eee29f26.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-7B-v0.3/1762652580.362854", + "retrieved_timestamp": "1762652580.362854", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistralai/Mistral-7B-v0.3", + "developer": "mistral", + "inference_platform": "unknown", + "id": "mistralai/Mistral-7B-v0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22663976028050017 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45168546294642503 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030211480362537766 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4031770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2952958776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/mistralai/Mistral-Nemo-Base-2407/51b35f7f-f6f7-44ca-9816-b3d812112131.json b/data/hfopenllm_v2/mistral/mistralai/Mistral-Nemo-Base-2407/51b35f7f-f6f7-44ca-9816-b3d812112131.json new file mode 100644 index 000000000..00ffe181a --- /dev/null +++ b/data/hfopenllm_v2/mistral/mistralai/Mistral-Nemo-Base-2407/51b35f7f-f6f7-44ca-9816-b3d812112131.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Nemo-Base-2407/1762652580.363275", + "retrieved_timestamp": "1762652580.363276", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistralai/Mistral-Nemo-Base-2407", + "developer": "mistral", + "inference_platform": "unknown", + "id": "mistralai/Mistral-Nemo-Base-2407" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16299197241098062 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5035062000369291 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05966767371601209 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3921354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34715757978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 11.58 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/mistralai/Mistral-Small-24B-Base-2501/6b30f50f-9a89-4a11-bcf9-4f38c46c1f18.json b/data/hfopenllm_v2/mistral/mistralai/Mistral-Small-24B-Base-2501/6b30f50f-9a89-4a11-bcf9-4f38c46c1f18.json new file mode 100644 index 000000000..01140ebe0 --- /dev/null +++ b/data/hfopenllm_v2/mistral/mistralai/Mistral-Small-24B-Base-2501/6b30f50f-9a89-4a11-bcf9-4f38c46c1f18.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Small-24B-Base-2501/1762652580.363713", + "retrieved_timestamp": "1762652580.363714", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistralai/Mistral-Small-24B-Base-2501", + "developer": "mistral", + "inference_platform": "unknown", + "id": "mistralai/Mistral-Small-24B-Base-2501" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16723848278124265 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6441860347172437 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1971299093655589 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3875838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42366666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5406416223404256 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/mistralai/Mixtral-8x22B-v0.1/b08cfbfa-906a-4dd0-b258-a7a56a6dcda4.json b/data/hfopenllm_v2/mistral/mistralai/Mixtral-8x22B-v0.1/b08cfbfa-906a-4dd0-b258-a7a56a6dcda4.json new file mode 100644 index 000000000..10352b777 --- /dev/null +++ b/data/hfopenllm_v2/mistral/mistralai/Mixtral-8x22B-v0.1/b08cfbfa-906a-4dd0-b258-a7a56a6dcda4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistralai_Mixtral-8x22B-v0.1/1762652580.364491", + "retrieved_timestamp": "1762652580.364492", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistralai/Mixtral-8x22B-v0.1", + "developer": "mistral", + "inference_platform": "unknown", + "id": "mistralai/Mixtral-8x22B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25826362939223485 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6239807473187268 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18353474320241692 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37583892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4036979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46392952127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 140.621 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/mistralai/Mixtral-8x7B-v0.1/4384c278-c869-4591-84fd-a8b2843fe42d.json b/data/hfopenllm_v2/mistral/mistralai/Mixtral-8x7B-v0.1/4384c278-c869-4591-84fd-a8b2843fe42d.json new file mode 100644 index 000000000..5d812e45f --- /dev/null +++ b/data/hfopenllm_v2/mistral/mistralai/Mixtral-8x7B-v0.1/4384c278-c869-4591-84fd-a8b2843fe42d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistralai_Mixtral-8x7B-v0.1/1762652580.3651662", + "retrieved_timestamp": "1762652580.3651662", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistralai/Mixtral-8x7B-v0.1", + "developer": "mistral", + "inference_platform": "unknown", + "id": "mistralai/Mixtral-8x7B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23260947618984296 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5097711377553386 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09365558912386707 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32046979865771813 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4413125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3871343085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/mistralai/Mixtral-8x7B-v0.1/f1822f64-0594-4f16-98f4-29932c604187.json b/data/hfopenllm_v2/mistral/mistralai/Mixtral-8x7B-v0.1/f1822f64-0594-4f16-98f4-29932c604187.json new file mode 100644 index 000000000..5c64cd96d --- /dev/null +++ b/data/hfopenllm_v2/mistral/mistralai/Mixtral-8x7B-v0.1/f1822f64-0594-4f16-98f4-29932c604187.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistralai_Mixtral-8x7B-v0.1/1762652580.364961", + "retrieved_timestamp": "1762652580.364962", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistralai/Mixtral-8x7B-v0.1", + "developer": "mistral", + "inference_platform": "unknown", + "id": "mistralai/Mixtral-8x7B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24152692633324024 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.508666743762444 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10196374622356495 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43213541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3849734042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish-Instruct/27e58a27-f4e9-4c7a-93f2-c3b15cab8f9f.json b/data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish-Instruct/27e58a27-f4e9-4c7a-93f2-c3b15cab8f9f.json new file mode 100644 index 000000000..9b5852417 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish-Instruct/27e58a27-f4e9-4c7a-93f2-c3b15cab8f9f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nazimali_Mistral-Nemo-Kurdish-Instruct/1762652580.376322", + "retrieved_timestamp": "1762652580.376323", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nazimali/Mistral-Nemo-Kurdish-Instruct", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nazimali/Mistral-Nemo-Kurdish-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4860004787297703 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47214400722999256 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40057291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30867686170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish-Instruct/3381e897-35f3-45f4-ac05-3ca47441b772.json b/data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish-Instruct/3381e897-35f3-45f4-ac05-3ca47441b772.json new file mode 100644 index 000000000..1e0cbd557 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish-Instruct/3381e897-35f3-45f4-ac05-3ca47441b772.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nazimali_Mistral-Nemo-Kurdish-Instruct/1762652580.376105", + "retrieved_timestamp": "1762652580.376106", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nazimali/Mistral-Nemo-Kurdish-Instruct", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nazimali/Mistral-Nemo-Kurdish-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4963917959901949 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4699417600389813 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.004531722054380665 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.397875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062666223404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish/0da50308-a631-4466-b2e4-2793412b31db.json b/data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish/0da50308-a631-4466-b2e4-2793412b31db.json new file mode 100644 index 000000000..4a448af8c --- /dev/null +++ b/data/hfopenllm_v2/mistral/nazimali/Mistral-Nemo-Kurdish/0da50308-a631-4466-b2e4-2793412b31db.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nazimali_Mistral-Nemo-Kurdish/1762652580.375733", + "retrieved_timestamp": "1762652580.3757372", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nazimali/Mistral-Nemo-Kurdish", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nazimali/Mistral-Nemo-Kurdish" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3401208792670115 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5133321102266589 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09592145015105741 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4115729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3234707446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/BigKartoffel-mistral-nemo-20B/95ba0175-5578-47fe-aec9-93ccf4f9f9af.json b/data/hfopenllm_v2/mistral/nbeerbower/BigKartoffel-mistral-nemo-20B/95ba0175-5578-47fe-aec9-93ccf4f9f9af.json new file mode 100644 index 000000000..f637629ba --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/BigKartoffel-mistral-nemo-20B/95ba0175-5578-47fe-aec9-93ccf4f9f9af.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_BigKartoffel-mistral-nemo-20B/1762652580.376553", + "retrieved_timestamp": "1762652580.376553", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/BigKartoffel-mistral-nemo-20B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/BigKartoffel-mistral-nemo-20B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5857181168189294 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.55148305168682 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42804166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3529753989361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 20.427 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/DoppelKartoffel-Mistral-Nemo-23B/5db2ec95-d423-4987-aaa7-b5919d1a2cc8.json b/data/hfopenllm_v2/mistral/nbeerbower/DoppelKartoffel-Mistral-Nemo-23B/5db2ec95-d423-4987-aaa7-b5919d1a2cc8.json new file mode 100644 index 000000000..7b0f9ef78 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/DoppelKartoffel-Mistral-Nemo-23B/5db2ec95-d423-4987-aaa7-b5919d1a2cc8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_DoppelKartoffel-Mistral-Nemo-23B/1762652580.376802", + "retrieved_timestamp": "1762652580.3768032", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/DoppelKartoffel-Mistral-Nemo-23B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/DoppelKartoffel-Mistral-Nemo-23B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5191480826429429 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5217926041279988 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030966767371601207 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3794895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3080119680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.153 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/DoublePotato-Mistral-Nemo-13B/03b30ba7-efc3-467e-bdde-c6a18437929b.json b/data/hfopenllm_v2/mistral/nbeerbower/DoublePotato-Mistral-Nemo-13B/03b30ba7-efc3-467e-bdde-c6a18437929b.json new file mode 100644 index 000000000..986069db9 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/DoublePotato-Mistral-Nemo-13B/03b30ba7-efc3-467e-bdde-c6a18437929b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_DoublePotato-Mistral-Nemo-13B/1762652580.377009", + "retrieved_timestamp": "1762652580.3770099", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/DoublePotato-Mistral-Nemo-13B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/DoublePotato-Mistral-Nemo-13B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6796156420519777 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5437915398770364 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45997916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.359624335106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 13.338 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Flammades-Mistral-Nemo-12B/a6e65aeb-f0d3-48ca-8f6e-933d0ea2113b.json b/data/hfopenllm_v2/mistral/nbeerbower/Flammades-Mistral-Nemo-12B/a6e65aeb-f0d3-48ca-8f6e-933d0ea2113b.json new file mode 100644 index 000000000..9ee107416 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/Flammades-Mistral-Nemo-12B/a6e65aeb-f0d3-48ca-8f6e-933d0ea2113b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Flammades-Mistral-Nemo-12B/1762652580.3785129", + "retrieved_timestamp": "1762652580.3785138", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Flammades-Mistral-Nemo-12B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/Flammades-Mistral-Nemo-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38415958545548035 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5299609345270283 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0755287009063444 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.480625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36610704787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Gutensuppe-mistral-nemo-12B/80a9277b-5768-4da0-96c6-3289a7b8a9bc.json b/data/hfopenllm_v2/mistral/nbeerbower/Gutensuppe-mistral-nemo-12B/80a9277b-5768-4da0-96c6-3289a7b8a9bc.json new file mode 100644 index 000000000..e91337d8b --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/Gutensuppe-mistral-nemo-12B/80a9277b-5768-4da0-96c6-3289a7b8a9bc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Gutensuppe-mistral-nemo-12B/1762652580.378963", + "retrieved_timestamp": "1762652580.378964", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Gutensuppe-mistral-nemo-12B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/Gutensuppe-mistral-nemo-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29161070404305023 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5486832203098263 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13293051359516617 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.337248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42903125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3680186170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Hermes2-Gutenberg2-Mistral-7B/b9b08e55-0c5d-427d-914b-e4cfb4de96b8.json b/data/hfopenllm_v2/mistral/nbeerbower/Hermes2-Gutenberg2-Mistral-7B/b9b08e55-0c5d-427d-914b-e4cfb4de96b8.json new file mode 100644 index 000000000..7d8c9c761 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/Hermes2-Gutenberg2-Mistral-7B/b9b08e55-0c5d-427d-914b-e4cfb4de96b8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Hermes2-Gutenberg2-Mistral-7B/1762652580.379175", + "retrieved_timestamp": "1762652580.379176", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Hermes2-Gutenberg2-Mistral-7B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/Hermes2-Gutenberg2-Mistral-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37214479802479644 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4981450458280896 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46230208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29928523936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Lyra-Gutenberg-mistral-nemo-12B/5b3de7db-009e-46c9-bf34-fe5912c39b81.json b/data/hfopenllm_v2/mistral/nbeerbower/Lyra-Gutenberg-mistral-nemo-12B/5b3de7db-009e-46c9-bf34-fe5912c39b81.json new file mode 100644 index 000000000..194fd0d2b --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/Lyra-Gutenberg-mistral-nemo-12B/5b3de7db-009e-46c9-bf34-fe5912c39b81.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Lyra-Gutenberg-mistral-nemo-12B/1762652580.3801112", + "retrieved_timestamp": "1762652580.380112", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Lyra-Gutenberg-mistral-nemo-12B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/Lyra-Gutenberg-mistral-nemo-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34948824674086976 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5586245741555749 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10120845921450151 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3338926174496644 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43566666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36278257978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated/0cee26b2-c3b3-40be-bc15-3fdaf7b4b38c.json b/data/hfopenllm_v2/mistral/nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated/0cee26b2-c3b3-40be-bc15-3fdaf7b4b38c.json new file mode 100644 index 000000000..2dd09d1d5 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated/0cee26b2-c3b3-40be-bc15-3fdaf7b4b38c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Mahou-1.5-mistral-nemo-12B-lorablated/1762652580.380727", + "retrieved_timestamp": "1762652580.380728", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6824880206740338 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5496040380079439 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45216666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35738031914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT/c3eae55f-ce07-4ea2-b9d4-92e0909a8b06.json b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT/c3eae55f-ce07-4ea2-b9d4-92e0909a8b06.json new file mode 100644 index 000000000..cf3fab77b --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT/c3eae55f-ce07-4ea2-b9d4-92e0909a8b06.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Gutenberg-Doppel-7B-FFT/1762652580.380932", + "retrieved_timestamp": "1762652580.380933", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5716798095719358 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40762540890255944 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.024924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4059375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2728557180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2/178418ad-2d0a-40cd-a057-105bbe69f937.json b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2/178418ad-2d0a-40cd-a057-105bbe69f937.json new file mode 100644 index 000000000..19af3b047 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2/178418ad-2d0a-40cd-a057-105bbe69f937.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Gutenberg-Doppel-12B-v2/1762652580.3813472", + "retrieved_timestamp": "1762652580.3813481", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6535869271311232 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5374496172235809 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11555891238670694 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42330208333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3546376329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B/012b188f-db69-4529-bfe3-db34c77e7dc0.json b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B/012b188f-db69-4529-bfe3-db34c77e7dc0.json new file mode 100644 index 000000000..58a1c1c1e --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B/012b188f-db69-4529-bfe3-db34c77e7dc0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Gutenberg-Doppel-12B/1762652580.381143", + "retrieved_timestamp": "1762652580.381144", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3567068711020093 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5274606999473499 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1216012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41321874999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35787898936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B-v2/e5582319-d8e6-4223-97bb-a64a2cc03853.json b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B-v2/e5582319-d8e6-4223-97bb-a64a2cc03853.json new file mode 100644 index 000000000..8c1a7d256 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B-v2/e5582319-d8e6-4223-97bb-a64a2cc03853.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Prism-12B-v2/1762652580.3824818", + "retrieved_timestamp": "1762652580.382483", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Mistral-Nemo-Prism-12B-v2", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/Mistral-Nemo-Prism-12B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6974006746543615 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5491875637377679 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45997916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3567154255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B-v7/d66604f0-15b3-4ac3-b0e9-083ab6906da0.json b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B-v7/d66604f0-15b3-4ac3-b0e9-083ab6906da0.json new file mode 100644 index 000000000..16a2168b3 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B-v7/d66604f0-15b3-4ac3-b0e9-083ab6906da0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Prism-12B-v7/1762652580.382694", + "retrieved_timestamp": "1762652580.382695", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Mistral-Nemo-Prism-12B-v7", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/Mistral-Nemo-Prism-12B-v7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6961517662025647 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5521104600038905 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08685800604229607 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46388541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35904255319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B/5ea20d83-ceee-4c52-911a-e25e9cfecf0e.json b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B/5ea20d83-ceee-4c52-911a-e25e9cfecf0e.json new file mode 100644 index 000000000..76f532d5f --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Nemo-Prism-12B/5ea20d83-ceee-4c52-911a-e25e9cfecf0e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Prism-12B/1762652580.382256", + "retrieved_timestamp": "1762652580.382257", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Mistral-Nemo-Prism-12B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/Mistral-Nemo-Prism-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6858103166265509 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5475186352291487 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08685800604229607 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46261458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3581283244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Small-Drummer-22B/2e86d526-de04-4339-8495-e88c5a9f3f18.json b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Small-Drummer-22B/2e86d526-de04-4339-8495-e88c5a9f3f18.json new file mode 100644 index 000000000..e6b3f8b8c --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Small-Drummer-22B/2e86d526-de04-4339-8495-e88c5a9f3f18.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Small-Drummer-22B/1762652580.3829079", + "retrieved_timestamp": "1762652580.3829088", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Mistral-Small-Drummer-22B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/Mistral-Small-Drummer-22B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6331289866443259 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5793201948136216 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18882175226586104 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34312080536912754 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40636458333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40949135638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Small-Gutenberg-Doppel-22B/99cfc94d-3cde-4e42-924a-5c4a4c7f217a.json b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Small-Gutenberg-Doppel-22B/99cfc94d-3cde-4e42-924a-5c4a4c7f217a.json new file mode 100644 index 000000000..d0186f18f --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/Mistral-Small-Gutenberg-Doppel-22B/99cfc94d-3cde-4e42-924a-5c4a4c7f217a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Small-Gutenberg-Doppel-22B/1762652580.383116", + "retrieved_timestamp": "1762652580.383116", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Mistral-Small-Gutenberg-Doppel-22B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/Mistral-Small-Gutenberg-Doppel-22B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48932277468228746 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5858932329112819 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21827794561933533 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3464765100671141 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39706250000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41240026595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/Stella-mistral-nemo-12B-v2/ed825fd6-f749-449f-a1d6-c3ad7a82e354.json b/data/hfopenllm_v2/mistral/nbeerbower/Stella-mistral-nemo-12B-v2/ed825fd6-f749-449f-a1d6-c3ad7a82e354.json new file mode 100644 index 000000000..b100c5d48 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/Stella-mistral-nemo-12B-v2/ed825fd6-f749-449f-a1d6-c3ad7a82e354.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Stella-mistral-nemo-12B-v2/1762652580.384186", + "retrieved_timestamp": "1762652580.384186", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Stella-mistral-nemo-12B-v2", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/Stella-mistral-nemo-12B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32743121584063617 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5483750956495209 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1163141993957704 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33221476510067116 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4303958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3684341755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-bophades-12B/1cb58f83-841d-474a-9c7b-adece8cab805.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-bophades-12B/1cb58f83-841d-474a-9c7b-adece8cab805.json new file mode 100644 index 000000000..7435e83fe --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-bophades-12B/1cb58f83-841d-474a-9c7b-adece8cab805.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-bophades-12B/1762652580.385997", + "retrieved_timestamp": "1762652580.385998", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/mistral-nemo-bophades-12B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/mistral-nemo-bophades-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6794405510711579 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4988471515853883 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12311178247734139 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41778125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35006648936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-bophades3-12B/2043110d-2b63-4133-9c53-b39b5b7869b6.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-bophades3-12B/2043110d-2b63-4133-9c53-b39b5b7869b6.json new file mode 100644 index 000000000..cf69675cd --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-bophades3-12B/2043110d-2b63-4133-9c53-b39b5b7869b6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-bophades3-12B/1762652580.386282", + "retrieved_timestamp": "1762652580.386283", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/mistral-nemo-bophades3-12B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/mistral-nemo-bophades3-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6577835698169745 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.544933208169299 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4604479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3371010638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-cc-12B/45e38c7d-5f31-404b-8fcc-9f3cad239cd1.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-cc-12B/45e38c7d-5f31-404b-8fcc-9f3cad239cd1.json new file mode 100644 index 000000000..1bbdb4318 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-cc-12B/45e38c7d-5f31-404b-8fcc-9f3cad239cd1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-cc-12B/1762652580.386496", + "retrieved_timestamp": "1762652580.386497", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/mistral-nemo-cc-12B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/mistral-nemo-cc-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14353249378316202 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5399409546487519 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0256797583081571 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44236458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3597905585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutades-12B/b83d5033-b513-4472-84c1-1b757c533137.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutades-12B/b83d5033-b513-4472-84c1-1b757c533137.json new file mode 100644 index 000000000..e75800965 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutades-12B/b83d5033-b513-4472-84c1-1b757c533137.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutades-12B/1762652580.3867059", + "retrieved_timestamp": "1762652580.3867059", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/mistral-nemo-gutades-12B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/mistral-nemo-gutades-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3425189608017837 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5407194259684368 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11782477341389729 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4040416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3560505319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v2/db2dee58-3a9c-4789-800d-ed7207c6699c.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v2/db2dee58-3a9c-4789-800d-ed7207c6699c.json new file mode 100644 index 000000000..c0fd12a00 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v2/db2dee58-3a9c-4789-800d-ed7207c6699c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutenberg-12B-v2/1762652580.38711", + "retrieved_timestamp": "1762652580.387111", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/mistral-nemo-gutenberg-12B-v2", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/mistral-nemo-gutenberg-12B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6203395878491292 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5397203788283472 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10876132930513595 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4286979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34990026595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v3/b4ed9f85-c1bb-4a52-8ba6-69f4e0f8e442.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v3/b4ed9f85-c1bb-4a52-8ba6-69f4e0f8e442.json new file mode 100644 index 000000000..327f7b376 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v3/b4ed9f85-c1bb-4a52-8ba6-69f4e0f8e442.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutenberg-12B-v3/1762652580.387317", + "retrieved_timestamp": "1762652580.3873181", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/mistral-nemo-gutenberg-12B-v3", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/mistral-nemo-gutenberg-12B-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21827085466562057 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.544065799051091 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05966767371601209 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44503125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3644448138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v4/9f84023e-a23c-4d2c-afb3-f93629f97a6f.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v4/9f84023e-a23c-4d2c-afb3-f93629f97a6f.json new file mode 100644 index 000000000..b56ed44f3 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B-v4/9f84023e-a23c-4d2c-afb3-f93629f97a6f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutenberg-12B-v4/1762652580.3875241", + "retrieved_timestamp": "1762652580.387525", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/mistral-nemo-gutenberg-12B-v4", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/mistral-nemo-gutenberg-12B-v4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.237929804031082 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5269028864823667 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12613293051359517 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4104270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3575465425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B/9f8c4246-9770-4790-8db0-095e722d89e9.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B/9f8c4246-9770-4790-8db0-095e722d89e9.json new file mode 100644 index 000000000..c3739fd8d --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg-12B/9f8c4246-9770-4790-8db0-095e722d89e9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutenberg-12B/1762652580.3869052", + "retrieved_timestamp": "1762652580.3869061", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/mistral-nemo-gutenberg-12B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/mistral-nemo-gutenberg-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.350386973231027 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5281363707697807 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1163141993957704 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41706250000000006 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3562167553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg2-12B-test/10a4d2dc-4779-4b0f-92fa-010a6a51fe9f.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg2-12B-test/10a4d2dc-4779-4b0f-92fa-010a6a51fe9f.json new file mode 100644 index 000000000..878eed44f --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-gutenberg2-12B-test/10a4d2dc-4779-4b0f-92fa-010a6a51fe9f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutenberg2-12B-test/1762652580.387729", + "retrieved_timestamp": "1762652580.38773", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/mistral-nemo-gutenberg2-12B-test", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/mistral-nemo-gutenberg2-12B-test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33847192116916447 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.525477908630255 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1163141993957704 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4157291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35546875 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-kartoffel-12B/b111507d-92e8-4af1-882a-9434d6825f51.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-kartoffel-12B/b111507d-92e8-4af1-882a-9434d6825f51.json new file mode 100644 index 000000000..41c5ea155 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-kartoffel-12B/b111507d-92e8-4af1-882a-9434d6825f51.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-kartoffel-12B/1762652580.3880079", + "retrieved_timestamp": "1762652580.3880079", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/mistral-nemo-kartoffel-12B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/mistral-nemo-kartoffel-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7031709198260616 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5483796436144805 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08534743202416918 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46528125000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35846077127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-narwhal-12B/e1bd9218-4bfb-4df1-a2bf-4a10937240dc.json b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-narwhal-12B/e1bd9218-4bfb-4df1-a2bf-4a10937240dc.json new file mode 100644 index 000000000..7291c03b1 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nbeerbower/mistral-nemo-narwhal-12B/e1bd9218-4bfb-4df1-a2bf-4a10937240dc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-narwhal-12B/1762652580.388214", + "retrieved_timestamp": "1762652580.388215", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/mistral-nemo-narwhal-12B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nbeerbower/mistral-nemo-narwhal-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5549187267561182 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5057374929934754 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0581570996978852 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38469791666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34832114361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v1/97b61e29-2157-4167-b5bd-94919ecdcacc.json b/data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v1/97b61e29-2157-4167-b5bd-94919ecdcacc.json new file mode 100644 index 000000000..7e5035282 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v1/97b61e29-2157-4167-b5bd-94919ecdcacc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nlpguy_Mistral-NeMo-Minitron-Upscale-v1/1762652580.4083898", + "retrieved_timestamp": "1762652580.408391", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nlpguy/Mistral-NeMo-Minitron-Upscale-v1", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16484040124647048 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44679984097967057 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3803541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2537400265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.451 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v2/9cee29c1-b8dc-4a2c-b117-d5912b890824.json b/data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v2/9cee29c1-b8dc-4a2c-b117-d5912b890824.json new file mode 100644 index 000000000..a8fc22a27 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v2/9cee29c1-b8dc-4a2c-b117-d5912b890824.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nlpguy_Mistral-NeMo-Minitron-Upscale-v2/1762652580.4086552", + "retrieved_timestamp": "1762652580.408656", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nlpguy/Mistral-NeMo-Minitron-Upscale-v2", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15727159492369136 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3949668154807224 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3790833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1926529255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.451 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v3/7d2d135a-ab81-49fa-8c17-07f9bd54399d.json b/data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v3/7d2d135a-ab81-49fa-8c17-07f9bd54399d.json new file mode 100644 index 000000000..0104d795c --- /dev/null +++ b/data/hfopenllm_v2/mistral/nlpguy/Mistral-NeMo-Minitron-Upscale-v3/7d2d135a-ab81-49fa-8c17-07f9bd54399d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nlpguy_Mistral-NeMo-Minitron-Upscale-v3/1762652580.408863", + "retrieved_timestamp": "1762652580.408864", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nlpguy/Mistral-NeMo-Minitron-Upscale-v3", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14120976786038822 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30524522602918064 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.011329305135951661 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40984375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11710438829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.451 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/nvidia/Mistral-NeMo-Minitron-8B-Base/7bbc4787-9899-4d90-90c6-dec88bc7dd52.json b/data/hfopenllm_v2/mistral/nvidia/Mistral-NeMo-Minitron-8B-Base/7bbc4787-9899-4d90-90c6-dec88bc7dd52.json new file mode 100644 index 000000000..fd96f7c82 --- /dev/null +++ b/data/hfopenllm_v2/mistral/nvidia/Mistral-NeMo-Minitron-8B-Base/7bbc4787-9899-4d90-90c6-dec88bc7dd52.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nvidia_Mistral-NeMo-Minitron-8B-Base/1762652580.415714", + "retrieved_timestamp": "1762652580.415715", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nvidia/Mistral-NeMo-Minitron-8B-Base", + "developer": "mistral", + "inference_platform": "unknown", + "id": "nvidia/Mistral-NeMo-Minitron-8B-Base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19456597383830457 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5219098090521418 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04833836858006042 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32550335570469796 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40915625000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37957114361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.88 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/pszemraj/Mistral-v0.3-6B/729b4f81-32da-41d2-8fa4-d18553b37b83.json b/data/hfopenllm_v2/mistral/pszemraj/Mistral-v0.3-6B/729b4f81-32da-41d2-8fa4-d18553b37b83.json new file mode 100644 index 000000000..c0da68dda --- /dev/null +++ b/data/hfopenllm_v2/mistral/pszemraj/Mistral-v0.3-6B/729b4f81-32da-41d2-8fa4-d18553b37b83.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pszemraj_Mistral-v0.3-6B/1762652580.481565", + "retrieved_timestamp": "1762652580.481566", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pszemraj/Mistral-v0.3-6B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "pszemraj/Mistral-v0.3-6B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2453744952282167 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3774050646438491 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39077083333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2142619680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 5.939 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/shivam9980/mistral-7b-news-cnn-merged/ce626634-c5a4-422d-8b03-1a28108809ce.json b/data/hfopenllm_v2/mistral/shivam9980/mistral-7b-news-cnn-merged/ce626634-c5a4-422d-8b03-1a28108809ce.json new file mode 100644 index 000000000..daeaee03c --- /dev/null +++ b/data/hfopenllm_v2/mistral/shivam9980/mistral-7b-news-cnn-merged/ce626634-c5a4-422d-8b03-1a28108809ce.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/shivam9980_mistral-7b-news-cnn-merged/1762652580.515563", + "retrieved_timestamp": "1762652580.515563", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "shivam9980/mistral-7b-news-cnn-merged", + "developer": "mistral", + "inference_platform": "unknown", + "id": "shivam9980/mistral-7b-news-cnn-merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4634192830578421 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3635484854246454 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0188821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45226041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28274601063829785 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 7.723 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/shivank21/mistral_dpo_self/7b07e583-36df-47df-8439-224eca2e5761.json b/data/hfopenllm_v2/mistral/shivank21/mistral_dpo_self/7b07e583-36df-47df-8439-224eca2e5761.json new file mode 100644 index 000000000..fd3a22ff5 --- /dev/null +++ b/data/hfopenllm_v2/mistral/shivank21/mistral_dpo_self/7b07e583-36df-47df-8439-224eca2e5761.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/shivank21_mistral_dpo_self/1762652580.5158348", + "retrieved_timestamp": "1762652580.515836", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "shivank21/mistral_dpo_self", + "developer": "mistral", + "inference_platform": "unknown", + "id": "shivank21/mistral_dpo_self" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.340345837932242 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3216256961597798 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2407718120805369 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2214095744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "", + "params_billions": 7.913 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/siqi00/Mistral-7B-DFT/e2f4255d-12ff-4c88-996d-bac6b51aaa33.json b/data/hfopenllm_v2/mistral/siqi00/Mistral-7B-DFT/e2f4255d-12ff-4c88-996d-bac6b51aaa33.json new file mode 100644 index 000000000..c850a0955 --- /dev/null +++ b/data/hfopenllm_v2/mistral/siqi00/Mistral-7B-DFT/e2f4255d-12ff-4c88-996d-bac6b51aaa33.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/siqi00_Mistral-7B-DFT/1762652580.5171149", + "retrieved_timestamp": "1762652580.5171149", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "siqi00/Mistral-7B-DFT", + "developer": "mistral", + "inference_platform": "unknown", + "id": "siqi00/Mistral-7B-DFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5568668909604294 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46648773367771273 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0377643504531722 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41911458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2962932180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/siqi00/Mistral-7B-DFT2/dae2a1a6-a608-4b64-a77a-e4aed87e7d7f.json b/data/hfopenllm_v2/mistral/siqi00/Mistral-7B-DFT2/dae2a1a6-a608-4b64-a77a-e4aed87e7d7f.json new file mode 100644 index 000000000..aff1c6591 --- /dev/null +++ b/data/hfopenllm_v2/mistral/siqi00/Mistral-7B-DFT2/dae2a1a6-a608-4b64-a77a-e4aed87e7d7f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/siqi00_Mistral-7B-DFT2/1762652580.5173602", + "retrieved_timestamp": "1762652580.517361", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "siqi00/Mistral-7B-DFT2", + "developer": "mistral", + "inference_platform": "unknown", + "id": "siqi00/Mistral-7B-DFT2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5803723010501026 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39683798240076246 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44007291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523936170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/spmurrayzzz/Mistral-Syndicate-7B/80934f3c-8d0b-49be-9f42-e187b4729cff.json b/data/hfopenllm_v2/mistral/spmurrayzzz/Mistral-Syndicate-7B/80934f3c-8d0b-49be-9f42-e187b4729cff.json new file mode 100644 index 000000000..c7943b5b6 --- /dev/null +++ b/data/hfopenllm_v2/mistral/spmurrayzzz/Mistral-Syndicate-7B/80934f3c-8d0b-49be-9f42-e187b4729cff.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/spmurrayzzz_Mistral-Syndicate-7B/1762652580.534304", + "retrieved_timestamp": "1762652580.534305", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "spmurrayzzz/Mistral-Syndicate-7B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "spmurrayzzz/Mistral-Syndicate-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.249595517670891 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42450570755678535 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.033987915407854986 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43855208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2631316489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/teknium/CollectiveCognition-v1.1-Mistral-7B/626bfec9-65d1-4250-8d07-d9c8a008b554.json b/data/hfopenllm_v2/mistral/teknium/CollectiveCognition-v1.1-Mistral-7B/626bfec9-65d1-4250-8d07-d9c8a008b554.json new file mode 100644 index 000000000..8716ed5bf --- /dev/null +++ b/data/hfopenllm_v2/mistral/teknium/CollectiveCognition-v1.1-Mistral-7B/626bfec9-65d1-4250-8d07-d9c8a008b554.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/teknium_CollectiveCognition-v1.1-Mistral-7B/1762652580.55394", + "retrieved_timestamp": "1762652580.553941", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "teknium/CollectiveCognition-v1.1-Mistral-7B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "teknium/CollectiveCognition-v1.1-Mistral-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27904626391308396 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4493426704276236 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030966767371601207 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3869270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28366023936170215 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/teknium/OpenHermes-2-Mistral-7B/f24b2adb-f12d-4dd8-984b-8ab43e15720f.json b/data/hfopenllm_v2/mistral/teknium/OpenHermes-2-Mistral-7B/f24b2adb-f12d-4dd8-984b-8ab43e15720f.json new file mode 100644 index 000000000..42fca7b97 --- /dev/null +++ b/data/hfopenllm_v2/mistral/teknium/OpenHermes-2-Mistral-7B/f24b2adb-f12d-4dd8-984b-8ab43e15720f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/teknium_OpenHermes-2-Mistral-7B/1762652580.5544581", + "retrieved_timestamp": "1762652580.5544589", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "teknium/OpenHermes-2-Mistral-7B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "teknium/OpenHermes-2-Mistral-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5286151854856226 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4947516371878204 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45197916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2931349734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/teknium/OpenHermes-2.5-Mistral-7B/66d1a6cf-41da-4226-a06c-fc99641e754a.json b/data/hfopenllm_v2/mistral/teknium/OpenHermes-2.5-Mistral-7B/66d1a6cf-41da-4226-a06c-fc99641e754a.json new file mode 100644 index 000000000..c5a1ed2e2 --- /dev/null +++ b/data/hfopenllm_v2/mistral/teknium/OpenHermes-2.5-Mistral-7B/66d1a6cf-41da-4226-a06c-fc99641e754a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/teknium_OpenHermes-2.5-Mistral-7B/1762652580.554678", + "retrieved_timestamp": "1762652580.5546792", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "teknium/OpenHermes-2.5-Mistral-7B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "teknium/OpenHermes-2.5-Mistral-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5571417173100706 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4870013259924984 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4241979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3054355053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/tensopolis/mistral-small-2501-tensopolis-v1/53ec68aa-e4fc-430f-8ccf-f5886f1b9d4b.json b/data/hfopenllm_v2/mistral/tensopolis/mistral-small-2501-tensopolis-v1/53ec68aa-e4fc-430f-8ccf-f5886f1b9d4b.json new file mode 100644 index 000000000..8755a9e8b --- /dev/null +++ b/data/hfopenllm_v2/mistral/tensopolis/mistral-small-2501-tensopolis-v1/53ec68aa-e4fc-430f-8ccf-f5886f1b9d4b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tensopolis_mistral-small-2501-tensopolis-v1/1762652580.555758", + "retrieved_timestamp": "1762652580.555758", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tensopolis/mistral-small-2501-tensopolis-v1", + "developer": "mistral", + "inference_platform": "unknown", + "id": "tensopolis/mistral-small-2501-tensopolis-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7762104549262623 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6474735931872574 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44410876132930516 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3573825503355705 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42797916666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4464760638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/tensopolis/mistral-small-r1-tensopolis/b2ee17e1-3d66-4622-8ea9-3bf8747371a5.json b/data/hfopenllm_v2/mistral/tensopolis/mistral-small-r1-tensopolis/b2ee17e1-3d66-4622-8ea9-3bf8747371a5.json new file mode 100644 index 000000000..c4888e1ff --- /dev/null +++ b/data/hfopenllm_v2/mistral/tensopolis/mistral-small-r1-tensopolis/b2ee17e1-3d66-4622-8ea9-3bf8747371a5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tensopolis_mistral-small-r1-tensopolis/1762652580.556001", + "retrieved_timestamp": "1762652580.5560021", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tensopolis/mistral-small-r1-tensopolis", + "developer": "mistral", + "inference_platform": "unknown", + "id": "tensopolis/mistral-small-r1-tensopolis" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.462220242290456 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5435969591888976 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.290785498489426 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4035073138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/theprint/Conversely-Mistral-7B/5adde1ed-2d8f-4aa6-96f9-042df5358747.json b/data/hfopenllm_v2/mistral/theprint/Conversely-Mistral-7B/5adde1ed-2d8f-4aa6-96f9-042df5358747.json new file mode 100644 index 000000000..6cdafee16 --- /dev/null +++ b/data/hfopenllm_v2/mistral/theprint/Conversely-Mistral-7B/5adde1ed-2d8f-4aa6-96f9-042df5358747.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/theprint_Conversely-Mistral-7B/1762652580.56185", + "retrieved_timestamp": "1762652580.5618508", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "theprint/Conversely-Mistral-7B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "theprint/Conversely-Mistral-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2608113139802391 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4672348146697077 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4188958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28257978723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 14.496 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/tianyil1/MistralForCausalLM_Cal_DPO/9902ef50-5208-4053-bb90-e08c98211b3f.json b/data/hfopenllm_v2/mistral/tianyil1/MistralForCausalLM_Cal_DPO/9902ef50-5208-4053-bb90-e08c98211b3f.json new file mode 100644 index 000000000..fb8fb64ac --- /dev/null +++ b/data/hfopenllm_v2/mistral/tianyil1/MistralForCausalLM_Cal_DPO/9902ef50-5208-4053-bb90-e08c98211b3f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tianyil1_MistralForCausalLM_Cal_DPO/1762652580.566411", + "retrieved_timestamp": "1762652580.566412", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tianyil1/MistralForCausalLM_Cal_DPO", + "developer": "mistral", + "inference_platform": "unknown", + "id": "tianyil1/MistralForCausalLM_Cal_DPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5327619604870633 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43814239617517153 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028700906344410877 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39765625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2763464095744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/uukuguy/speechless-code-mistral-7b-v1.0/cebdb6d6-a12c-47f6-b912-4b8e98763c48.json b/data/hfopenllm_v2/mistral/uukuguy/speechless-code-mistral-7b-v1.0/cebdb6d6-a12c-47f6-b912-4b8e98763c48.json new file mode 100644 index 000000000..75eb4e6b2 --- /dev/null +++ b/data/hfopenllm_v2/mistral/uukuguy/speechless-code-mistral-7b-v1.0/cebdb6d6-a12c-47f6-b912-4b8e98763c48.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/uukuguy_speechless-code-mistral-7b-v1.0/1762652580.581523", + "retrieved_timestamp": "1762652580.581524", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "uukuguy/speechless-code-mistral-7b-v1.0", + "developer": "mistral", + "inference_platform": "unknown", + "id": "uukuguy/speechless-code-mistral-7b-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36652415590632853 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4571712887094195 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05211480362537765 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45017708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145777925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/vicgalle/Merge-Mistral-Prometheus-7B/ecfdb6a4-36d7-4252-9677-10655b3855e5.json b/data/hfopenllm_v2/mistral/vicgalle/Merge-Mistral-Prometheus-7B/ecfdb6a4-36d7-4252-9677-10655b3855e5.json new file mode 100644 index 000000000..dd973e825 --- /dev/null +++ b/data/hfopenllm_v2/mistral/vicgalle/Merge-Mistral-Prometheus-7B/ecfdb6a4-36d7-4252-9677-10655b3855e5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vicgalle_Merge-Mistral-Prometheus-7B/1762652580.5881548", + "retrieved_timestamp": "1762652580.5881548", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vicgalle/Merge-Mistral-Prometheus-7B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "vicgalle/Merge-Mistral-Prometheus-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48480143796238423 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.420139773821292 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01812688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2716921542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/xinchen9/Mistral-7B-CoT/6c54d5e2-7fca-4fa3-9d04-0f44d0651018.json b/data/hfopenllm_v2/mistral/xinchen9/Mistral-7B-CoT/6c54d5e2-7fca-4fa3-9d04-0f44d0651018.json new file mode 100644 index 000000000..75b6cc60a --- /dev/null +++ b/data/hfopenllm_v2/mistral/xinchen9/Mistral-7B-CoT/6c54d5e2-7fca-4fa3-9d04-0f44d0651018.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xinchen9_Mistral-7B-CoT/1762652580.5978932", + "retrieved_timestamp": "1762652580.597894", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xinchen9/Mistral-7B-CoT", + "developer": "mistral", + "inference_platform": "unknown", + "id": "xinchen9/Mistral-7B-CoT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2783470081605695 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38726762098069667 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.024924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24916107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3994270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2283909574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B-200K/4d45347d-4491-4d7b-9abe-02c42974f520.json b/data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B-200K/4d45347d-4491-4d7b-9abe-02c42974f520.json new file mode 100644 index 000000000..9e11ff27c --- /dev/null +++ b/data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B-200K/4d45347d-4491-4d7b-9abe-02c42974f520.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yam-peleg_Hebrew-Mistral-7B-200K/1762652580.6038961", + "retrieved_timestamp": "1762652580.603897", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yam-peleg/Hebrew-Mistral-7B-200K", + "developer": "mistral", + "inference_platform": "unknown", + "id": "yam-peleg/Hebrew-Mistral-7B-200K" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17698041197356346 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3410500846818921 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030966767371601207 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37399999999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2529089095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.504 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B-200K/83a71a32-796a-4fec-9513-2f4b5e032749.json b/data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B-200K/83a71a32-796a-4fec-9513-2f4b5e032749.json new file mode 100644 index 000000000..faa496f2f --- /dev/null +++ b/data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B-200K/83a71a32-796a-4fec-9513-2f4b5e032749.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yam-peleg_Hebrew-Mistral-7B-200K/1762652580.6036632", + "retrieved_timestamp": "1762652580.603664", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yam-peleg/Hebrew-Mistral-7B-200K", + "developer": "mistral", + "inference_platform": "unknown", + "id": "yam-peleg/Hebrew-Mistral-7B-200K" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1855731680829089 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4149272793394017 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.023413897280966767 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3764791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25731382978723405 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.504 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B/99c28dc3-f614-430a-99d7-31c2218c4d7f.json b/data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B/99c28dc3-f614-430a-99d7-31c2218c4d7f.json new file mode 100644 index 000000000..daea51f5d --- /dev/null +++ b/data/hfopenllm_v2/mistral/yam-peleg/Hebrew-Mistral-7B/99c28dc3-f614-430a-99d7-31c2218c4d7f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yam-peleg_Hebrew-Mistral-7B/1762652580.603384", + "retrieved_timestamp": "1762652580.603385", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yam-peleg/Hebrew-Mistral-7B", + "developer": "mistral", + "inference_platform": "unknown", + "id": "yam-peleg/Hebrew-Mistral-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23283443485507344 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43340366992362034 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04984894259818731 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39765625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27800864361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.504 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Ministral-8B-Instruct-2410/d0cfd22e-6bad-4784-a172-76892d44f70b.json b/data/hfopenllm_v2/mistralai/Ministral-8B-Instruct-2410/d0cfd22e-6bad-4784-a172-76892d44f70b.json new file mode 100644 index 000000000..4d94a1b4f --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Ministral-8B-Instruct-2410/d0cfd22e-6bad-4784-a172-76892d44f70b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistralai_Ministral-8B-Instruct-2410/1762652580.361781", + "retrieved_timestamp": "1762652580.361782", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistralai/Ministral-8B-Instruct-2410", + "developer": "mistralai", + "inference_platform": "unknown", + "id": "mistralai/Ministral-8B-Instruct-2410" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5896399331551394 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47616402016891385 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19561933534743203 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3291223404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.02 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.1/ef779e6f-1c12-4237-aa45-e6315ed01d92.json b/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.1/ef779e6f-1c12-4237-aa45-e6315ed01d92.json new file mode 100644 index 000000000..17dea97c6 --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.1/ef779e6f-1c12-4237-aa45-e6315ed01d92.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-7B-Instruct-v0.1/1762652580.3620229", + "retrieved_timestamp": "1762652580.3620229", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistralai/Mistral-7B-Instruct-v0.1", + "developer": "mistralai", + "inference_platform": "unknown", + "id": "mistralai/Mistral-7B-Instruct-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4487060998151571 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33548084759810987 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.022658610271903322 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38476041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24143949468085107 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.2/fb55e940-f03d-4d79-9363-ec17eebf9596.json b/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.2/fb55e940-f03d-4d79-9363-ec17eebf9596.json new file mode 100644 index 000000000..a3e5ab858 --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.2/fb55e940-f03d-4d79-9363-ec17eebf9596.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-7B-Instruct-v0.2/1762652580.362234", + "retrieved_timestamp": "1762652580.3622348", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistralai/Mistral-7B-Instruct-v0.2", + "developer": "mistralai", + "inference_platform": "unknown", + "id": "mistralai/Mistral-7B-Instruct-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5496227786717023 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44597355203292793 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030211480362537766 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39660416666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2716921542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.3/ddc775e5-a4cc-49bd-ace3-113f325134c0.json b/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.3/ddc775e5-a4cc-49bd-ace3-113f325134c0.json new file mode 100644 index 000000000..a14a40674 --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mistral-7B-Instruct-v0.3/ddc775e5-a4cc-49bd-ace3-113f325134c0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-7B-Instruct-v0.3/1762652580.362444", + "retrieved_timestamp": "1762652580.362445", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistralai/Mistral-7B-Instruct-v0.3", + "developer": "mistralai", + "inference_platform": "unknown", + "id": "mistralai/Mistral-7B-Instruct-v0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5465254413844156 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47219631712648397 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37390625000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30751329787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mistral-Large-Instruct-2411/1f2c9c0c-7e71-4886-9980-300a7ae5c55e.json b/data/hfopenllm_v2/mistralai/Mistral-Large-Instruct-2411/1f2c9c0c-7e71-4886-9980-300a7ae5c55e.json new file mode 100644 index 000000000..7fe0e1393 --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mistral-Large-Instruct-2411/1f2c9c0c-7e71-4886-9980-300a7ae5c55e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Large-Instruct-2411/1762652580.3630579", + "retrieved_timestamp": "1762652580.363059", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistralai/Mistral-Large-Instruct-2411", + "developer": "mistralai", + "inference_platform": "unknown", + "id": "mistralai/Mistral-Large-Instruct-2411" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8400577135334246 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6746647735675069 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4954682779456193 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43708053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.454 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5561835106382979 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 122.61 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mistral-Nemo-Instruct-2407/3758a033-b197-403b-ab9e-7457856f3ebc.json b/data/hfopenllm_v2/mistralai/Mistral-Nemo-Instruct-2407/3758a033-b197-403b-ab9e-7457856f3ebc.json new file mode 100644 index 000000000..8f0339087 --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mistral-Nemo-Instruct-2407/3758a033-b197-403b-ab9e-7457856f3ebc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Nemo-Instruct-2407/1762652580.363499", + "retrieved_timestamp": "1762652580.363499", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistralai/Mistral-Nemo-Instruct-2407", + "developer": "mistralai", + "inference_platform": "unknown", + "id": "mistralai/Mistral-Nemo-Instruct-2407" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6380248850826917 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5036523950310812 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1268882175226586 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38999999999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3517287234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/15f66094-73f1-4302-adad-69522872682d.json b/data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/15f66094-73f1-4302-adad-69522872682d.json new file mode 100644 index 000000000..2dc05c6a2 --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/15f66094-73f1-4302-adad-69522872682d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Small-Instruct-2409/1762652580.363916", + "retrieved_timestamp": "1762652580.363917", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistralai/Mistral-Small-Instruct-2409", + "developer": "mistralai", + "inference_platform": "unknown", + "id": "mistralai/Mistral-Small-Instruct-2409" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.666975846310013 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5213075098146217 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36320833333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39602726063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.05 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/a85d1dbd-465b-42c8-baf5-0e7a7ca00725.json b/data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/a85d1dbd-465b-42c8-baf5-0e7a7ca00725.json new file mode 100644 index 000000000..af0d0073a --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mistral-Small-Instruct-2409/a85d1dbd-465b-42c8-baf5-0e7a7ca00725.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Small-Instruct-2409/1762652580.364117", + "retrieved_timestamp": "1762652580.364118", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistralai/Mistral-Small-Instruct-2409", + "developer": "mistralai", + "inference_platform": "unknown", + "id": "mistralai/Mistral-Small-Instruct-2409" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6282829558903709 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5830283846898211 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33305369127516776 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4063333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.409906914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mixtral-8x22B-Instruct-v0.1/ee88881e-cdeb-4a55-b784-6b41b983d7aa.json b/data/hfopenllm_v2/mistralai/Mixtral-8x22B-Instruct-v0.1/ee88881e-cdeb-4a55-b784-6b41b983d7aa.json new file mode 100644 index 000000000..94fa837fd --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mixtral-8x22B-Instruct-v0.1/ee88881e-cdeb-4a55-b784-6b41b983d7aa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistralai_Mixtral-8x22B-Instruct-v0.1/1762652580.3642921", + "retrieved_timestamp": "1762652580.3642921", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistralai/Mixtral-8x22B-Instruct-v0.1", + "developer": "mistralai", + "inference_platform": "unknown", + "id": "mistralai/Mixtral-8x22B-Instruct-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7183584001560305 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6124924926272018 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18731117824773413 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3733221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43111458333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44830452127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 140.621 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mistralai/Mixtral-8x7B-Instruct-v0.1/2e1de889-2df9-4c81-b5ce-c00c602704b7.json b/data/hfopenllm_v2/mistralai/Mixtral-8x7B-Instruct-v0.1/2e1de889-2df9-4c81-b5ce-c00c602704b7.json new file mode 100644 index 000000000..12d85825e --- /dev/null +++ b/data/hfopenllm_v2/mistralai/Mixtral-8x7B-Instruct-v0.1/2e1de889-2df9-4c81-b5ce-c00c602704b7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mistralai_Mixtral-8x7B-Instruct-v0.1/1762652580.364703", + "retrieved_timestamp": "1762652580.364704", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mistralai/Mixtral-8x7B-Instruct-v0.1", + "developer": "mistralai", + "inference_platform": "unknown", + "id": "mistralai/Mixtral-8x7B-Instruct-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5599143605633053 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49623654013356494 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09138972809667674 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42032291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36918218085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mixtao/MixTAO-7Bx2-MoE-v8.1/a6032673-fee4-4c8c-97fa-167729f495d6.json b/data/hfopenllm_v2/mixtao/MixTAO-7Bx2-MoE-v8.1/a6032673-fee4-4c8c-97fa-167729f495d6.json new file mode 100644 index 000000000..367285450 --- /dev/null +++ b/data/hfopenllm_v2/mixtao/MixTAO-7Bx2-MoE-v8.1/a6032673-fee4-4c8c-97fa-167729f495d6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mixtao_MixTAO-7Bx2-MoE-v8.1/1762652580.3653471", + "retrieved_timestamp": "1762652580.365348", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mixtao/MixTAO-7Bx2-MoE-v8.1", + "developer": "mixtao", + "inference_platform": "unknown", + "id": "mixtao/MixTAO-7Bx2-MoE-v8.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41623337189767595 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5189059391733521 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09063444108761329 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4463333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3123337765957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mkxu/llama-3-8b-instruct-fpo/0ba6add2-4495-4261-baab-224c0b6c683f.json b/data/hfopenllm_v2/mkxu/llama-3-8b-instruct-fpo/0ba6add2-4495-4261-baab-224c0b6c683f.json new file mode 100644 index 000000000..e0004bc69 --- /dev/null +++ b/data/hfopenllm_v2/mkxu/llama-3-8b-instruct-fpo/0ba6add2-4495-4261-baab-224c0b6c683f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mkxu_llama-3-8b-instruct-fpo/1762652580.366677", + "retrieved_timestamp": "1762652580.366678", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mkxu/llama-3-8b-instruct-fpo", + "developer": "mkxu", + "inference_platform": "unknown", + "id": "mkxu/llama-3-8b-instruct-fpo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6790161216682846 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4959114413700331 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07326283987915408 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36578125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36045545212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/AlphaMonarch-7B/d7eb4408-6857-4df1-b92b-9dd4712a4f23.json b/data/hfopenllm_v2/mlabonne/AlphaMonarch-7B/d7eb4408-6857-4df1-b92b-9dd4712a4f23.json new file mode 100644 index 000000000..5c03aaf24 --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/AlphaMonarch-7B/d7eb4408-6857-4df1-b92b-9dd4712a4f23.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mlabonne_AlphaMonarch-7B/1762652580.367184", + "retrieved_timestamp": "1762652580.3671849", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mlabonne/AlphaMonarch-7B", + "developer": "mlabonne", + "inference_platform": "unknown", + "id": "mlabonne/AlphaMonarch-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49394384677101205 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4625522037183211 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41213541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24725731382978725 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/Beyonder-4x7B-v3/b0867447-6dd9-453c-af09-da0db5651e65.json b/data/hfopenllm_v2/mlabonne/Beyonder-4x7B-v3/b0867447-6dd9-453c-af09-da0db5651e65.json new file mode 100644 index 000000000..1ebe13856 --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/Beyonder-4x7B-v3/b0867447-6dd9-453c-af09-da0db5651e65.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mlabonne_Beyonder-4x7B-v3/1762652580.36743", + "retrieved_timestamp": "1762652580.367431", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mlabonne/Beyonder-4x7B-v3", + "developer": "mlabonne", + "inference_platform": "unknown", + "id": "mlabonne/Beyonder-4x7B-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5608385749810503 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4670522037183211 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28523489932885904 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40454166666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2512466755319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/BigQwen2.5-52B-Instruct/b18517f1-db51-43a8-812f-75aeccae508f.json b/data/hfopenllm_v2/mlabonne/BigQwen2.5-52B-Instruct/b18517f1-db51-43a8-812f-75aeccae508f.json new file mode 100644 index 000000000..2c05f364f --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/BigQwen2.5-52B-Instruct/b18517f1-db51-43a8-812f-75aeccae508f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mlabonne_BigQwen2.5-52B-Instruct/1762652580.3676438", + "retrieved_timestamp": "1762652580.367645", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mlabonne/BigQwen2.5-52B-Instruct", + "developer": "mlabonne", + "inference_platform": "unknown", + "id": "mlabonne/BigQwen2.5-52B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7913480675718205 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7121004678698547 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.547583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41130208333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5519448138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 52.268 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/BigQwen2.5-Echo-47B-Instruct/12efcd4e-13cc-46e5-964a-35d4be69a01e.json b/data/hfopenllm_v2/mlabonne/BigQwen2.5-Echo-47B-Instruct/12efcd4e-13cc-46e5-964a-35d4be69a01e.json new file mode 100644 index 000000000..cf8eee269 --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/BigQwen2.5-Echo-47B-Instruct/12efcd4e-13cc-46e5-964a-35d4be69a01e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mlabonne_BigQwen2.5-Echo-47B-Instruct/1762652580.36785", + "retrieved_timestamp": "1762652580.36785", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mlabonne/BigQwen2.5-Echo-47B-Instruct", + "developer": "mlabonne", + "inference_platform": "unknown", + "id": "mlabonne/BigQwen2.5-Echo-47B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7356691356711305 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6125111878044905 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4124791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4734042553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 47.392 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/Daredevil-8B-abliterated/3ad89b65-5719-4e54-aadf-c10d3f27857a.json b/data/hfopenllm_v2/mlabonne/Daredevil-8B-abliterated/3ad89b65-5719-4e54-aadf-c10d3f27857a.json new file mode 100644 index 000000000..a20078ae0 --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/Daredevil-8B-abliterated/3ad89b65-5719-4e54-aadf-c10d3f27857a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mlabonne_Daredevil-8B-abliterated/1762652580.3686998", + "retrieved_timestamp": "1762652580.3686998", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mlabonne/Daredevil-8B-abliterated", + "developer": "mlabonne", + "inference_platform": "unknown", + "id": "mlabonne/Daredevil-8B-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44263664853699297 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4254272523147253 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09441087613293052 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40702083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3700964095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/Daredevil-8B/4653087e-b528-47c1-86eb-0166538229bc.json b/data/hfopenllm_v2/mlabonne/Daredevil-8B/4653087e-b528-47c1-86eb-0166538229bc.json new file mode 100644 index 000000000..f299341cd --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/Daredevil-8B/4653087e-b528-47c1-86eb-0166538229bc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mlabonne_Daredevil-8B/1762652580.368499", + "retrieved_timestamp": "1762652580.3685", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mlabonne/Daredevil-8B", + "developer": "mlabonne", + "inference_platform": "unknown", + "id": "mlabonne/Daredevil-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45477665926408595 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5194408746721715 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.393875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.383061835106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated/605f3f59-204e-4332-8b4e-9da04871ca1b.json b/data/hfopenllm_v2/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated/605f3f59-204e-4332-8b4e-9da04871ca1b.json new file mode 100644 index 000000000..22e1c8b47 --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated/605f3f59-204e-4332-8b4e-9da04871ca1b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mlabonne_Meta-Llama-3.1-8B-Instruct-abliterated/1762652580.369122", + "retrieved_timestamp": "1762652580.369123", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", + "developer": "mlabonne", + "inference_platform": "unknown", + "id": "mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7329463601023063 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48740648734902187 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06873111782477341 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36488541666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3503158244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/NeuralBeagle14-7B/0bfec228-5bfb-4662-8be5-ad910b5bc3bd.json b/data/hfopenllm_v2/mlabonne/NeuralBeagle14-7B/0bfec228-5bfb-4662-8be5-ad910b5bc3bd.json new file mode 100644 index 000000000..70bbaabda --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/NeuralBeagle14-7B/0bfec228-5bfb-4662-8be5-ad910b5bc3bd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mlabonne_NeuralBeagle14-7B/1762652580.369343", + "retrieved_timestamp": "1762652580.369343", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mlabonne/NeuralBeagle14-7B", + "developer": "mlabonne", + "inference_platform": "unknown", + "id": "mlabonne/NeuralBeagle14-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49351941736813876 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46278709452353844 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05211480362537765 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43194791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2601396276595745 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/05fe5948-c228-46f5-ac96-3c234bc5b3ce.json b/data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/05fe5948-c228-46f5-ac96-3c234bc5b3ce.json new file mode 100644 index 000000000..617dc51cc --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/05fe5948-c228-46f5-ac96-3c234bc5b3ce.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mlabonne_NeuralDaredevil-8B-abliterated/1762652580.369559", + "retrieved_timestamp": "1762652580.36956", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mlabonne/NeuralDaredevil-8B-abliterated", + "developer": "mlabonne", + "inference_platform": "unknown", + "id": "mlabonne/NeuralDaredevil-8B-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.756077208473517 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5110566504436299 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09063444108761329 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4019375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38414228723404253 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/d4b40160-579a-4e66-96a2-8441e5c02694.json b/data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/d4b40160-579a-4e66-96a2-8441e5c02694.json new file mode 100644 index 000000000..fc07e3600 --- /dev/null +++ b/data/hfopenllm_v2/mlabonne/NeuralDaredevil-8B-abliterated/d4b40160-579a-4e66-96a2-8441e5c02694.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mlabonne_NeuralDaredevil-8B-abliterated/1762652580.369774", + "retrieved_timestamp": "1762652580.369775", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mlabonne/NeuralDaredevil-8B-abliterated", + "developer": "mlabonne", + "inference_platform": "unknown", + "id": "mlabonne/NeuralDaredevil-8B-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41623337189767595 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5123964057729099 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08534743202416918 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4149583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3801529255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32/9bf2a7e3-e744-4ac0-853a-f5cec8ef9c57.json b/data/hfopenllm_v2/mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32/9bf2a7e3-e744-4ac0-853a-f5cec8ef9c57.json new file mode 100644 index 000000000..4ea828776 --- /dev/null +++ b/data/hfopenllm_v2/mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32/9bf2a7e3-e744-4ac0-853a-f5cec8ef9c57.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mlx-community_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32/1762652580.3704169", + "retrieved_timestamp": "1762652580.3704178", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32", + "developer": "mlx-community", + "inference_platform": "unknown", + "id": "mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3368983186833158 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32921013057720044 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3249166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16381316489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mlx-community/Mistral-Small-24B-Instruct-2501-bf16/d769592a-faa3-4269-abac-373679f42c62.json b/data/hfopenllm_v2/mlx-community/Mistral-Small-24B-Instruct-2501-bf16/d769592a-faa3-4269-abac-373679f42c62.json new file mode 100644 index 000000000..f73fe4f49 --- /dev/null +++ b/data/hfopenllm_v2/mlx-community/Mistral-Small-24B-Instruct-2501-bf16/d769592a-faa3-4269-abac-373679f42c62.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mlx-community_Mistral-Small-24B-Instruct-2501-bf16/1762652580.3707452", + "retrieved_timestamp": "1762652580.3707461", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mlx-community/Mistral-Small-24B-Instruct-2501-bf16", + "developer": "mlx-community", + "inference_platform": "unknown", + "id": "mlx-community/Mistral-Small-24B-Instruct-2501-bf16" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6282829558903709 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6713272911918485 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32250755287009064 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3951342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4618333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5394780585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 23.572 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/moeru-ai/L3.1-Moe-2x8B-v0.2/cf47622f-c921-4610-adef-bed2a4670249.json b/data/hfopenllm_v2/moeru-ai/L3.1-Moe-2x8B-v0.2/cf47622f-c921-4610-adef-bed2a4670249.json new file mode 100644 index 000000000..d49c92766 --- /dev/null +++ b/data/hfopenllm_v2/moeru-ai/L3.1-Moe-2x8B-v0.2/cf47622f-c921-4610-adef-bed2a4670249.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/moeru-ai_L3.1-Moe-2x8B-v0.2/1762652580.371698", + "retrieved_timestamp": "1762652580.3716989", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "moeru-ai/L3.1-Moe-2x8B-v0.2", + "developer": "moeru-ai", + "inference_platform": "unknown", + "id": "moeru-ai/L3.1-Moe-2x8B-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7347947889377962 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5255688392585965 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16993957703927492 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41985416666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38580452127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 13.668 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.1/bbcae028-046e-4e87-b991-5d7b92c42cc2.json b/data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.1/bbcae028-046e-4e87-b991-5d7b92c42cc2.json new file mode 100644 index 000000000..3595645ed --- /dev/null +++ b/data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.1/bbcae028-046e-4e87-b991-5d7b92c42cc2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/moeru-ai_L3.1-Moe-4x8B-v0.1/1762652580.371937", + "retrieved_timestamp": "1762652580.371938", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "moeru-ai/L3.1-Moe-4x8B-v0.1", + "developer": "moeru-ai", + "inference_platform": "unknown", + "id": "moeru-ai/L3.1-Moe-4x8B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.433219413378724 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49392781736367014 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3609166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34541223404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.942 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.2/e6fe5591-f6aa-40c6-897f-f90084682109.json b/data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.2/e6fe5591-f6aa-40c6-897f-f90084682109.json new file mode 100644 index 000000000..6346742cd --- /dev/null +++ b/data/hfopenllm_v2/moeru-ai/L3.1-Moe-4x8B-v0.2/e6fe5591-f6aa-40c6-897f-f90084682109.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/moeru-ai_L3.1-Moe-4x8B-v0.2/1762652580.372139", + "retrieved_timestamp": "1762652580.37214", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "moeru-ai/L3.1-Moe-4x8B-v0.2", + "developer": "moeru-ai", + "inference_platform": "unknown", + "id": "moeru-ai/L3.1-Moe-4x8B-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5406554608438943 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.446625675582615 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10347432024169184 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3233958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27626329787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.942 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/monsterapi/Llama-3_1-8B-Instruct-orca-ORPO/b70a3980-7b0b-4bb1-878f-c2d49f9df09e.json b/data/hfopenllm_v2/monsterapi/Llama-3_1-8B-Instruct-orca-ORPO/b70a3980-7b0b-4bb1-878f-c2d49f9df09e.json new file mode 100644 index 000000000..0a138e139 --- /dev/null +++ b/data/hfopenllm_v2/monsterapi/Llama-3_1-8B-Instruct-orca-ORPO/b70a3980-7b0b-4bb1-878f-c2d49f9df09e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/monsterapi_Llama-3_1-8B-Instruct-orca-ORPO/1762652580.3723478", + "retrieved_timestamp": "1762652580.3723478", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "monsterapi/Llama-3_1-8B-Instruct-orca-ORPO", + "developer": "monsterapi", + "inference_platform": "unknown", + "id": "monsterapi/Llama-3_1-8B-Instruct-orca-ORPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22728914834860392 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28653625778742803 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24916107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34447916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11677194148936171 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 16.061 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mosaicml/mpt-7b/5e55c7ee-90f6-40a4-83ca-4a3acdad40f2.json b/data/hfopenllm_v2/mosaicml/mpt-7b/5e55c7ee-90f6-40a4-83ca-4a3acdad40f2.json new file mode 100644 index 000000000..785d01b37 --- /dev/null +++ b/data/hfopenllm_v2/mosaicml/mpt-7b/5e55c7ee-90f6-40a4-83ca-4a3acdad40f2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mosaicml_mpt-7b/1762652580.3728561", + "retrieved_timestamp": "1762652580.372857", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mosaicml/mpt-7b", + "developer": "mosaicml", + "inference_platform": "unknown", + "id": "mosaicml/mpt-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21519900530592162 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32997415960801324 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36723958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12059507978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MPTForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection/e0d9dbcc-8df2-4207-b849-2c4984340605.json b/data/hfopenllm_v2/mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection/e0d9dbcc-8df2-4207-b849-2c4984340605.json new file mode 100644 index 000000000..8356faa84 --- /dev/null +++ b/data/hfopenllm_v2/mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection/e0d9dbcc-8df2-4207-b849-2c4984340605.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mosama_Qwen2.5-1.5B-Instruct-CoT-Reflection/1762652580.373101", + "retrieved_timestamp": "1762652580.3731022", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection", + "developer": "mosama", + "inference_platform": "unknown", + "id": "mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2870394996387363 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41093712633583523 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027190332326283987 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3211979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26512632978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mrdayl/OpenCogito/aacaba19-8c17-4d20-b27b-672810272ed4.json b/data/hfopenllm_v2/mrdayl/OpenCogito/aacaba19-8c17-4d20-b27b-672810272ed4.json new file mode 100644 index 000000000..4b39b30bd --- /dev/null +++ b/data/hfopenllm_v2/mrdayl/OpenCogito/aacaba19-8c17-4d20-b27b-672810272ed4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mrdayl_OpenCogito/1762652580.373355", + "retrieved_timestamp": "1762652580.373356", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mrdayl/OpenCogito", + "developer": "mrdayl", + "inference_platform": "unknown", + "id": "mrdayl/OpenCogito" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3933773498761065 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47196973414577464 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21827794561933533 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42401041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3451628989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mrdayl/OpenCognito-r1/91e89f4c-d05b-476a-a8d9-0186ef8d1418.json b/data/hfopenllm_v2/mrdayl/OpenCognito-r1/91e89f4c-d05b-476a-a8d9-0186ef8d1418.json new file mode 100644 index 000000000..75516e4cb --- /dev/null +++ b/data/hfopenllm_v2/mrdayl/OpenCognito-r1/91e89f4c-d05b-476a-a8d9-0186ef8d1418.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mrdayl_OpenCognito-r1/1762652580.3737972", + "retrieved_timestamp": "1762652580.373798", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mrdayl/OpenCognito-r1", + "developer": "mrdayl", + "inference_platform": "unknown", + "id": "mrdayl/OpenCognito-r1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42412687225450696 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4673346036303057 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1903323262839879 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42407291666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3474900265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mrdayl/OpenCognito-r2/672c6991-3c7b-48c3-9e95-389175e7cd6b.json b/data/hfopenllm_v2/mrdayl/OpenCognito-r2/672c6991-3c7b-48c3-9e95-389175e7cd6b.json new file mode 100644 index 000000000..7d9968dfd --- /dev/null +++ b/data/hfopenllm_v2/mrdayl/OpenCognito-r2/672c6991-3c7b-48c3-9e95-389175e7cd6b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mrdayl_OpenCognito-r2/1762652580.373997", + "retrieved_timestamp": "1762652580.3739982", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mrdayl/OpenCognito-r2", + "developer": "mrdayl", + "inference_platform": "unknown", + "id": "mrdayl/OpenCognito-r2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3958751667797001 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46882818163435913 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20241691842900303 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42016666666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34616023936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mrdayl/OpenCognito/049eb195-7ca8-42a7-bf2a-e072b7929958.json b/data/hfopenllm_v2/mrdayl/OpenCognito/049eb195-7ca8-42a7-bf2a-e072b7929958.json new file mode 100644 index 000000000..f9199d0a4 --- /dev/null +++ b/data/hfopenllm_v2/mrdayl/OpenCognito/049eb195-7ca8-42a7-bf2a-e072b7929958.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mrdayl_OpenCognito/1762652580.373594", + "retrieved_timestamp": "1762652580.373594", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mrdayl/OpenCognito", + "developer": "mrdayl", + "inference_platform": "unknown", + "id": "mrdayl/OpenCognito" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40621661635571393 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4705607805549634 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21148036253776434 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42934374999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3443317819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/mrdayl/OpenThink/ae71ec28-7e22-42c4-8549-4334dff8a811.json b/data/hfopenllm_v2/mrdayl/OpenThink/ae71ec28-7e22-42c4-8549-4334dff8a811.json new file mode 100644 index 000000000..e565eb932 --- /dev/null +++ b/data/hfopenllm_v2/mrdayl/OpenThink/ae71ec28-7e22-42c4-8549-4334dff8a811.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/mrdayl_OpenThink/1762652580.374203", + "retrieved_timestamp": "1762652580.374204", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "mrdayl/OpenThink", + "developer": "mrdayl", + "inference_platform": "unknown", + "id": "mrdayl/OpenThink" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20540720842919008 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34597850879756104 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28851963746223563 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32888541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18500664893617022 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/natong19/Mistral-Nemo-Instruct-2407-abliterated/5256f7b6-f830-4733-a092-01470607558d.json b/data/hfopenllm_v2/natong19/Mistral-Nemo-Instruct-2407-abliterated/5256f7b6-f830-4733-a092-01470607558d.json new file mode 100644 index 000000000..ab416a61e --- /dev/null +++ b/data/hfopenllm_v2/natong19/Mistral-Nemo-Instruct-2407-abliterated/5256f7b6-f830-4733-a092-01470607558d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/natong19_Mistral-Nemo-Instruct-2407-abliterated/1762652580.375077", + "retrieved_timestamp": "1762652580.375078", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "natong19/Mistral-Nemo-Instruct-2407-abliterated", + "developer": "natong19", + "inference_platform": "unknown", + "id": "natong19/Mistral-Nemo-Instruct-2407-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6392239258500778 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5048447739625885 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13217522658610273 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4033333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.351811835106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/natong19/Qwen2-7B-Instruct-abliterated/7c8605a5-2f0d-4cc7-b840-d77cb5fdf849.json b/data/hfopenllm_v2/natong19/Qwen2-7B-Instruct-abliterated/7c8605a5-2f0d-4cc7-b840-d77cb5fdf849.json new file mode 100644 index 000000000..07e5087ae --- /dev/null +++ b/data/hfopenllm_v2/natong19/Qwen2-7B-Instruct-abliterated/7c8605a5-2f0d-4cc7-b840-d77cb5fdf849.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/natong19_Qwen2-7B-Instruct-abliterated/1762652580.375325", + "retrieved_timestamp": "1762652580.375325", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "natong19/Qwen2-7B-Instruct-abliterated", + "developer": "natong19", + "inference_platform": "unknown", + "id": "natong19/Qwen2-7B-Instruct-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5836945970026197 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5553035842403061 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2764350453172205 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4034270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3842253989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Kartoffel-Deepfry-12B/09ba1be1-4b42-4eba-810f-a0aed64aafc0.json b/data/hfopenllm_v2/nbeerbower/Kartoffel-Deepfry-12B/09ba1be1-4b42-4eba-810f-a0aed64aafc0.json new file mode 100644 index 000000000..d5ab932e3 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Kartoffel-Deepfry-12B/09ba1be1-4b42-4eba-810f-a0aed64aafc0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Kartoffel-Deepfry-12B/1762652580.379381", + "retrieved_timestamp": "1762652580.3793821", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Kartoffel-Deepfry-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "id": "nbeerbower/Kartoffel-Deepfry-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5021620411618949 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5365374219062301 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4791666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3582114361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg-12B/02606fe0-ca08-4102-9670-8a18a9cc6f81.json b/data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg-12B/02606fe0-ca08-4102-9670-8a18a9cc6f81.json new file mode 100644 index 000000000..ca3088c8e --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg-12B/02606fe0-ca08-4102-9670-8a18a9cc6f81.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Lyra4-Gutenberg-12B/1762652580.380318", + "retrieved_timestamp": "1762652580.380318", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Lyra4-Gutenberg-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "id": "nbeerbower/Lyra4-Gutenberg-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2212185888996751 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.538669487933139 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4037916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35713098404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg2-12B/f9da5237-3903-4bbf-a0bc-0bcf3152f45a.json b/data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg2-12B/f9da5237-3903-4bbf-a0bc-0bcf3152f45a.json new file mode 100644 index 000000000..62629fe00 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Lyra4-Gutenberg2-12B/f9da5237-3903-4bbf-a0bc-0bcf3152f45a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Lyra4-Gutenberg2-12B/1762652580.380519", + "retrieved_timestamp": "1762652580.3805199", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Lyra4-Gutenberg2-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "id": "nbeerbower/Lyra4-Gutenberg2-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25851296781428834 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5344527944750038 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11706948640483383 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39721874999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35654920212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental/e7337143-6ec7-4467-b6f5-907492705cc9.json b/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental/e7337143-6ec7-4467-b6f5-907492705cc9.json new file mode 100644 index 000000000..263725538 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental/e7337143-6ec7-4467-b6f5-907492705cc9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Moderne-12B-FFT-experimental/1762652580.3819818", + "retrieved_timestamp": "1762652580.381983", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental", + "developer": "nbeerbower", + "inference_platform": "unknown", + "id": "nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33522498082864577 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5234089179237257 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0770392749244713 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3714895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3454953457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Nemo-Loony-12B-experimental/894b90c6-c701-47d8-b930-4e271e28962f.json b/data/hfopenllm_v2/nbeerbower/Nemo-Loony-12B-experimental/894b90c6-c701-47d8-b930-4e271e28962f.json new file mode 100644 index 000000000..5e709cfeb --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Nemo-Loony-12B-experimental/894b90c6-c701-47d8-b930-4e271e28962f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Nemo-Loony-12B-experimental/1762652580.383332", + "retrieved_timestamp": "1762652580.383332", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Nemo-Loony-12B-experimental", + "developer": "nbeerbower", + "inference_platform": "unknown", + "id": "nbeerbower/Nemo-Loony-12B-experimental" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37344357416100393 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38222228797769536 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3340625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1589095744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/Nemoties-ChatML-12B/3644fc16-b0fa-42d7-b17a-eb8f8332193f.json b/data/hfopenllm_v2/nbeerbower/Nemoties-ChatML-12B/3644fc16-b0fa-42d7-b17a-eb8f8332193f.json new file mode 100644 index 000000000..21ab08a7d --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/Nemoties-ChatML-12B/3644fc16-b0fa-42d7-b17a-eb8f8332193f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_Nemoties-ChatML-12B/1762652580.383542", + "retrieved_timestamp": "1762652580.383543", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/Nemoties-ChatML-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "id": "nbeerbower/Nemoties-ChatML-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6381999760635115 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5470252374810588 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07854984894259819 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45086458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3550531914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/SmolNemo-12B-FFT-experimental/435e3ce7-479f-4624-978e-25d755dee811.json b/data/hfopenllm_v2/nbeerbower/SmolNemo-12B-FFT-experimental/435e3ce7-479f-4624-978e-25d755dee811.json new file mode 100644 index 000000000..386c6543c --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/SmolNemo-12B-FFT-experimental/435e3ce7-479f-4624-978e-25d755dee811.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_SmolNemo-12B-FFT-experimental/1762652580.383975", + "retrieved_timestamp": "1762652580.383976", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/SmolNemo-12B-FFT-experimental", + "developer": "nbeerbower", + "inference_platform": "unknown", + "id": "nbeerbower/SmolNemo-12B-FFT-experimental" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3348005514257725 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3336088810494464 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38469791666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12167553191489362 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbeerbower/mistral-nemo-wissenschaft-12B/5f68a07f-4442-4453-92c3-b615323da96b.json b/data/hfopenllm_v2/nbeerbower/mistral-nemo-wissenschaft-12B/5f68a07f-4442-4453-92c3-b615323da96b.json new file mode 100644 index 000000000..e9b160754 --- /dev/null +++ b/data/hfopenllm_v2/nbeerbower/mistral-nemo-wissenschaft-12B/5f68a07f-4442-4453-92c3-b615323da96b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-wissenschaft-12B/1762652580.388424", + "retrieved_timestamp": "1762652580.388424", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbeerbower/mistral-nemo-wissenschaft-12B", + "developer": "nbeerbower", + "inference_platform": "unknown", + "id": "nbeerbower/mistral-nemo-wissenschaft-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6520133246452745 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5040306120993181 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1216012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41778125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35322473404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nbrahme/IndusQ/b372e098-0e1c-410a-8f5a-1bd9a910aa6b.json b/data/hfopenllm_v2/nbrahme/IndusQ/b372e098-0e1c-410a-8f5a-1bd9a910aa6b.json new file mode 100644 index 000000000..ef4ef1865 --- /dev/null +++ b/data/hfopenllm_v2/nbrahme/IndusQ/b372e098-0e1c-410a-8f5a-1bd9a910aa6b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nbrahme_IndusQ/1762652580.38863", + "retrieved_timestamp": "1762652580.388631", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nbrahme/IndusQ", + "developer": "nbrahme", + "inference_platform": "unknown", + "id": "nbrahme/IndusQ" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24397487555242311 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30624035198474986 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0007552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26510067114093966 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3366354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11203457446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 1.176 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/necva/replica-IEPile/86a45185-8753-4cd0-818f-63a62f03423f.json b/data/hfopenllm_v2/necva/replica-IEPile/86a45185-8753-4cd0-818f-63a62f03423f.json new file mode 100644 index 000000000..2bc5aa921 --- /dev/null +++ b/data/hfopenllm_v2/necva/replica-IEPile/86a45185-8753-4cd0-818f-63a62f03423f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/necva_replica-IEPile/1762652580.389119", + "retrieved_timestamp": "1762652580.38912", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "necva/replica-IEPile", + "developer": "necva", + "inference_platform": "unknown", + "id": "necva/replica-IEPile" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4677910167245132 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4778579652970231 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12386706948640483 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3997604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3560505319148936 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 4.65 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.1-bf16-falcon3-7b-instruct/5063eae6-e8f3-41c6-ab11-cfcc4a0a0cf3.json b/data/hfopenllm_v2/neopolita/jessi-v0.1-bf16-falcon3-7b-instruct/5063eae6-e8f3-41c6-ab11-cfcc4a0a0cf3.json new file mode 100644 index 000000000..5f6667883 --- /dev/null +++ b/data/hfopenllm_v2/neopolita/jessi-v0.1-bf16-falcon3-7b-instruct/5063eae6-e8f3-41c6-ab11-cfcc4a0a0cf3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.1-bf16-falcon3-7b-instruct/1762652580.389358", + "retrieved_timestamp": "1762652580.389359", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "neopolita/jessi-v0.1-bf16-falcon3-7b-instruct", + "developer": "neopolita", + "inference_platform": "unknown", + "id": "neopolita/jessi-v0.1-bf16-falcon3-7b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7527050448365891 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5516128933222162 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3806646525679758 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48248958333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3923703457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.1-falcon3-10b-instruct/c2ee0925-6e4a-4d3b-80be-b8b98156e3db.json b/data/hfopenllm_v2/neopolita/jessi-v0.1-falcon3-10b-instruct/c2ee0925-6e4a-4d3b-80be-b8b98156e3db.json new file mode 100644 index 000000000..3dd133767 --- /dev/null +++ b/data/hfopenllm_v2/neopolita/jessi-v0.1-falcon3-10b-instruct/c2ee0925-6e4a-4d3b-80be-b8b98156e3db.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.1-falcon3-10b-instruct/1762652580.389616", + "retrieved_timestamp": "1762652580.389617", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "neopolita/jessi-v0.1-falcon3-10b-instruct", + "developer": "neopolita", + "inference_platform": "unknown", + "id": "neopolita/jessi-v0.1-falcon3-10b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.755152994055772 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5952883626256132 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2001510574018127 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3187919463087248 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42785416666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4187998670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.1-qwen2.5-7b-instruct/9b1f077d-5893-417c-ac87-1d0beb39b750.json b/data/hfopenllm_v2/neopolita/jessi-v0.1-qwen2.5-7b-instruct/9b1f077d-5893-417c-ac87-1d0beb39b750.json new file mode 100644 index 000000000..f2e67b0c2 --- /dev/null +++ b/data/hfopenllm_v2/neopolita/jessi-v0.1-qwen2.5-7b-instruct/9b1f077d-5893-417c-ac87-1d0beb39b750.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.1-qwen2.5-7b-instruct/1762652580.3898308", + "retrieved_timestamp": "1762652580.3898308", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "neopolita/jessi-v0.1-qwen2.5-7b-instruct", + "developer": "neopolita", + "inference_platform": "unknown", + "id": "neopolita/jessi-v0.1-qwen2.5-7b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7326715337526651 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5292315105257686 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4086102719033233 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3913645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42278922872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.1-virtuoso-small/b4630d14-950d-4dbf-8897-74d46dd51130.json b/data/hfopenllm_v2/neopolita/jessi-v0.1-virtuoso-small/b4630d14-950d-4dbf-8897-74d46dd51130.json new file mode 100644 index 000000000..3fa3b5f98 --- /dev/null +++ b/data/hfopenllm_v2/neopolita/jessi-v0.1-virtuoso-small/b4630d14-950d-4dbf-8897-74d46dd51130.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.1-virtuoso-small/1762652580.3900428", + "retrieved_timestamp": "1762652580.3900428", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "neopolita/jessi-v0.1-virtuoso-small", + "developer": "neopolita", + "inference_platform": "unknown", + "id": "neopolita/jessi-v0.1-virtuoso-small" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7959192719761344 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6442861439957068 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33987915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43616666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5129654255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-10b-instruct/4a73436e-e2b7-4c03-b4b2-80d0ed8e389a.json b/data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-10b-instruct/4a73436e-e2b7-4c03-b4b2-80d0ed8e389a.json new file mode 100644 index 000000000..5b870a8b9 --- /dev/null +++ b/data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-10b-instruct/4a73436e-e2b7-4c03-b4b2-80d0ed8e389a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.2-falcon3-10b-instruct/1762652580.390252", + "retrieved_timestamp": "1762652580.390252", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "neopolita/jessi-v0.2-falcon3-10b-instruct", + "developer": "neopolita", + "inference_platform": "unknown", + "id": "neopolita/jessi-v0.2-falcon3-10b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7768099753099553 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6204846671314362 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2122356495468278 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288590604026846 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42813541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4354222074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-7b-instruct/bd8025f1-66d4-4644-af1b-ca5366a32964.json b/data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-7b-instruct/bd8025f1-66d4-4644-af1b-ca5366a32964.json new file mode 100644 index 000000000..33837f593 --- /dev/null +++ b/data/hfopenllm_v2/neopolita/jessi-v0.2-falcon3-7b-instruct/bd8025f1-66d4-4644-af1b-ca5366a32964.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.2-falcon3-7b-instruct/1762652580.39046", + "retrieved_timestamp": "1762652580.39046", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "neopolita/jessi-v0.2-falcon3-7b-instruct", + "developer": "neopolita", + "inference_platform": "unknown", + "id": "neopolita/jessi-v0.2-falcon3-7b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5770754930251731 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5363079188886575 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2537764350453172 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44788541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3904587765957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.3-falcon3-7b-instruct/95281cbf-6f27-4e17-b21f-9a0604d5629b.json b/data/hfopenllm_v2/neopolita/jessi-v0.3-falcon3-7b-instruct/95281cbf-6f27-4e17-b21f-9a0604d5629b.json new file mode 100644 index 000000000..eed5dd240 --- /dev/null +++ b/data/hfopenllm_v2/neopolita/jessi-v0.3-falcon3-7b-instruct/95281cbf-6f27-4e17-b21f-9a0604d5629b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.3-falcon3-7b-instruct/1762652580.390663", + "retrieved_timestamp": "1762652580.3906639", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "neopolita/jessi-v0.3-falcon3-7b-instruct", + "developer": "neopolita", + "inference_platform": "unknown", + "id": "neopolita/jessi-v0.3-falcon3-7b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7509064836855099 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.538793502664194 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18882175226586104 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46915625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3970246010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.4-falcon3-7b-instruct/514b1b8c-d80a-4851-afec-e04968b2e733.json b/data/hfopenllm_v2/neopolita/jessi-v0.4-falcon3-7b-instruct/514b1b8c-d80a-4851-afec-e04968b2e733.json new file mode 100644 index 000000000..0813ed116 --- /dev/null +++ b/data/hfopenllm_v2/neopolita/jessi-v0.4-falcon3-7b-instruct/514b1b8c-d80a-4851-afec-e04968b2e733.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.4-falcon3-7b-instruct/1762652580.39086", + "retrieved_timestamp": "1762652580.390861", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "neopolita/jessi-v0.4-falcon3-7b-instruct", + "developer": "neopolita", + "inference_platform": "unknown", + "id": "neopolita/jessi-v0.4-falcon3-7b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7603735865281896 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5521668757306609 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3768882175226586 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49712500000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40043218085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.5-falcon3-7b-instruct/6736897b-390a-4c19-8a04-9b606c1705b1.json b/data/hfopenllm_v2/neopolita/jessi-v0.5-falcon3-7b-instruct/6736897b-390a-4c19-8a04-9b606c1705b1.json new file mode 100644 index 000000000..1bea57d25 --- /dev/null +++ b/data/hfopenllm_v2/neopolita/jessi-v0.5-falcon3-7b-instruct/6736897b-390a-4c19-8a04-9b606c1705b1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.5-falcon3-7b-instruct/1762652580.391073", + "retrieved_timestamp": "1762652580.391074", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "neopolita/jessi-v0.5-falcon3-7b-instruct", + "developer": "neopolita", + "inference_platform": "unknown", + "id": "neopolita/jessi-v0.5-falcon3-7b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7411645544931892 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5589627302276082 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37386706948640486 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48652083333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3966090425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/jessi-v0.6-falcon3-7b-instruct/5b934386-a0e9-437d-bf9e-a51074415a1e.json b/data/hfopenllm_v2/neopolita/jessi-v0.6-falcon3-7b-instruct/5b934386-a0e9-437d-bf9e-a51074415a1e.json new file mode 100644 index 000000000..38c76d178 --- /dev/null +++ b/data/hfopenllm_v2/neopolita/jessi-v0.6-falcon3-7b-instruct/5b934386-a0e9-437d-bf9e-a51074415a1e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.6-falcon3-7b-instruct/1762652580.391277", + "retrieved_timestamp": "1762652580.391277", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "neopolita/jessi-v0.6-falcon3-7b-instruct", + "developer": "neopolita", + "inference_platform": "unknown", + "id": "neopolita/jessi-v0.6-falcon3-7b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7401904723910335 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5508818723957883 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3564954682779456 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49042708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3956948138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/neopolita/loki-v0.1-virtuoso/907047d7-1767-4009-8e04-02f5dc366355.json b/data/hfopenllm_v2/neopolita/loki-v0.1-virtuoso/907047d7-1767-4009-8e04-02f5dc366355.json new file mode 100644 index 000000000..96a5075f8 --- /dev/null +++ b/data/hfopenllm_v2/neopolita/loki-v0.1-virtuoso/907047d7-1767-4009-8e04-02f5dc366355.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/neopolita_loki-v0.1-virtuoso/1762652580.3914938", + "retrieved_timestamp": "1762652580.391495", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "neopolita/loki-v0.1-virtuoso", + "developer": "neopolita", + "inference_platform": "unknown", + "id": "neopolita/loki-v0.1-virtuoso" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7819308324135517 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6467251502613163 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3391238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35067114093959734 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43753125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5128823138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b/43da500e-cdc7-4b70-a0eb-6ae3371670d9.json b/data/hfopenllm_v2/netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b/43da500e-cdc7-4b70-a0eb-6ae3371670d9.json new file mode 100644 index 000000000..0e61ef544 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b/43da500e-cdc7-4b70-a0eb-6ae3371670d9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_DeepSeek-R1-MFANN-TIES-unretrained-7b/1762652580.3919501", + "retrieved_timestamp": "1762652580.391951", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2586880587951081 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30859903405301287 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2550335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3527291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11452792553191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN-SFT/748c7e5a-697b-4763-a43e-e3b6a6f2951b.json b/data/hfopenllm_v2/netcat420/MFANN-SFT/748c7e5a-697b-4763-a43e-e3b6a6f2951b.json new file mode 100644 index 000000000..a03535bc5 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN-SFT/748c7e5a-697b-4763-a43e-e3b6a6f2951b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN-SFT/1762652580.393719", + "retrieved_timestamp": "1762652580.3937201", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN-SFT", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANN-SFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36822298168858625 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.485188719488523 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05966767371601209 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3725416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3336103723404255 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3b/c5913e2b-c8c7-4e8f-a1c3-f2f764c8478d.json b/data/hfopenllm_v2/netcat420/MFANN3b/c5913e2b-c8c7-4e8f-a1c3-f2f764c8478d.json new file mode 100644 index 000000000..0afd635c8 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3b/c5913e2b-c8c7-4e8f-a1c3-f2f764c8478d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3b/1762652580.395648", + "retrieved_timestamp": "1762652580.395648", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN3b", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANN3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2524435165361241 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4433128382028508 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36060416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23055186170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.15/ebdb6805-f14e-4fb9-b1c8-acd258b93385.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.15/ebdb6805-f14e-4fb9-b1c8-acd258b93385.json new file mode 100644 index 000000000..6e09a6ebc --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv0.15/ebdb6805-f14e-4fb9-b1c8-acd258b93385.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.15/1762652580.3958452", + "retrieved_timestamp": "1762652580.395846", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN3bv0.15", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANN3bv0.15" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2012105657433388 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.453931293669888 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3957916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24684175531914893 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.18/5b522625-39ed-4faa-a3f6-1cec01baf906.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.18/5b522625-39ed-4faa-a3f6-1cec01baf906.json new file mode 100644 index 000000000..d77b9e4b8 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv0.18/5b522625-39ed-4faa-a3f6-1cec01baf906.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.18/1762652580.396076", + "retrieved_timestamp": "1762652580.396081", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN3bv0.18", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANN3bv0.18" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22064455644356973 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4514366169824164 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.024924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40236458333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.19/4207b373-ef5c-48f8-a463-814b81a44410.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.19/4207b373-ef5c-48f8-a463-814b81a44410.json new file mode 100644 index 000000000..7495594ea --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv0.19/4207b373-ef5c-48f8-a463-814b81a44410.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.19/1762652580.396478", + "retrieved_timestamp": "1762652580.396479", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN3bv0.19", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANN3bv0.19" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22581528123157665 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4515800678058734 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.022658610271903322 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40239583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25199468085106386 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.20/2d36210e-e2ca-41a8-9434-c29168849a28.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.20/2d36210e-e2ca-41a8-9434-c29168849a28.json new file mode 100644 index 000000000..bd3227c9e --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv0.20/2d36210e-e2ca-41a8-9434-c29168849a28.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.20/1762652580.3967948", + "retrieved_timestamp": "1762652580.396796", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN3bv0.20", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANN3bv0.20" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21934578030736224 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4493365019423472 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4077291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.21/053f6333-9722-4c3e-a5bb-246b273225de.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.21/053f6333-9722-4c3e-a5bb-246b273225de.json new file mode 100644 index 000000000..c76748100 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv0.21/053f6333-9722-4c3e-a5bb-246b273225de.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.21/1762652580.397045", + "retrieved_timestamp": "1762652580.397046", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN3bv0.21", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANN3bv0.21" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1909189838517356 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44700236898039053 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03172205438066465 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37594791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23927859042553193 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.22/e551e936-41fa-4fda-84e9-dec9f5694c5d.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.22/e551e936-41fa-4fda-84e9-dec9f5694c5d.json new file mode 100644 index 000000000..25235c3db --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv0.22/e551e936-41fa-4fda-84e9-dec9f5694c5d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.22/1762652580.39726", + "retrieved_timestamp": "1762652580.3972611", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN3bv0.22", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANN3bv0.22" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1979381374752324 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44851095830051274 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35213541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2517453457446808 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.23/28396f73-b949-4db0-b685-77fc5901770b.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.23/28396f73-b949-4db0-b685-77fc5901770b.json new file mode 100644 index 000000000..dbb9416bd --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv0.23/28396f73-b949-4db0-b685-77fc5901770b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.23/1762652580.39747", + "retrieved_timestamp": "1762652580.397471", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN3bv0.23", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANN3bv0.23" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20480768804549704 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44954178056127364 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.024924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3427395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2417719414893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv0.24/0081cd67-9178-4443-aebf-721b75c0fc77.json b/data/hfopenllm_v2/netcat420/MFANN3bv0.24/0081cd67-9178-4443-aebf-721b75c0fc77.json new file mode 100644 index 000000000..0119c1fd5 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv0.24/0081cd67-9178-4443-aebf-721b75c0fc77.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.24/1762652580.397681", + "retrieved_timestamp": "1762652580.397682", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN3bv0.24", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANN3bv0.24" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2200450360598767 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4407346600666096 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3520729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23520611702127658 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv1.1/fb148468-c189-4fe5-b803-7532af8dec1d.json b/data/hfopenllm_v2/netcat420/MFANN3bv1.1/fb148468-c189-4fe5-b803-7532af8dec1d.json new file mode 100644 index 000000000..e88e28911 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv1.1/fb148468-c189-4fe5-b803-7532af8dec1d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv1.1/1762652580.3978848", + "retrieved_timestamp": "1762652580.397886", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN3bv1.1", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANN3bv1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2506948230694557 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3397086626022651 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3223125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11585771276595745 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.775 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv1.2/16b4d316-db1d-4282-a5c0-b8ffe4af817c.json b/data/hfopenllm_v2/netcat420/MFANN3bv1.2/16b4d316-db1d-4282-a5c0-b8ffe4af817c.json new file mode 100644 index 000000000..b6e07831b --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv1.2/16b4d316-db1d-4282-a5c0-b8ffe4af817c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv1.2/1762652580.3980958", + "retrieved_timestamp": "1762652580.3980958", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN3bv1.2", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANN3bv1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2686050789682487 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3659932511014956 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31555208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14502992021276595 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.775 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv1.3/5981cb70-62a7-4e42-bf12-081c67c1b792.json b/data/hfopenllm_v2/netcat420/MFANN3bv1.3/5981cb70-62a7-4e42-bf12-081c67c1b792.json new file mode 100644 index 000000000..e4b9002a7 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv1.3/5981cb70-62a7-4e42-bf12-081c67c1b792.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv1.3/1762652580.3983822", + "retrieved_timestamp": "1762652580.3983831", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN3bv1.3", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANN3bv1.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25466650709007654 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4456312489762861 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.021148036253776436 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.329875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22755984042553193 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANN3bv1.4/426bdea2-83f2-4915-9e82-ba4c8c8f4224.json b/data/hfopenllm_v2/netcat420/MFANN3bv1.4/426bdea2-83f2-4915-9e82-ba4c8c8f4224.json new file mode 100644 index 000000000..0d34b22ce --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANN3bv1.4/426bdea2-83f2-4915-9e82-ba4c8c8f4224.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv1.4/1762652580.398614", + "retrieved_timestamp": "1762652580.3986151", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANN3bv1.4", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANN3bv1.4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35243598097492435 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4808549324972969 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03700906344410876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3707708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2705285904255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.78 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.19/d2b0785d-a169-4773-a3fc-95b536fe3cc2.json b/data/hfopenllm_v2/netcat420/MFANNv0.19/d2b0785d-a169-4773-a3fc-95b536fe3cc2.json new file mode 100644 index 000000000..4f8f4bb98 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANNv0.19/d2b0785d-a169-4773-a3fc-95b536fe3cc2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.19/1762652580.39887", + "retrieved_timestamp": "1762652580.39887", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANNv0.19", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANNv0.19" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30567449921763146 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47313832038755316 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04154078549848943 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35269791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24725731382978725 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.20/4c84cbc4-1a4d-45d9-909b-92d2b4e961b6.json b/data/hfopenllm_v2/netcat420/MFANNv0.20/4c84cbc4-1a4d-45d9-909b-92d2b4e961b6.json new file mode 100644 index 000000000..69ea189bb --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANNv0.20/4c84cbc4-1a4d-45d9-909b-92d2b4e961b6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.20/1762652580.399081", + "retrieved_timestamp": "1762652580.399082", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANNv0.20", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANNv0.20" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34786477657061043 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4574431878198548 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04984894259818731 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38739583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32022938829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.21/5d37ba65-09f6-4762-836e-4634c06ac9f7.json b/data/hfopenllm_v2/netcat420/MFANNv0.21/5d37ba65-09f6-4762-836e-4634c06ac9f7.json new file mode 100644 index 000000000..ef8d13071 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANNv0.21/5d37ba65-09f6-4762-836e-4634c06ac9f7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.21/1762652580.399296", + "retrieved_timestamp": "1762652580.399297", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANNv0.21", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANNv0.21" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3233099287667832 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45763723048372523 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3993333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3031083776595745 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.22.1/5009ba04-1a8d-4e91-bd32-659fe67c4d26.json b/data/hfopenllm_v2/netcat420/MFANNv0.22.1/5009ba04-1a8d-4e91-bd32-659fe67c4d26.json new file mode 100644 index 000000000..6c2c90eb8 --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANNv0.22.1/5009ba04-1a8d-4e91-bd32-659fe67c4d26.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.22.1/1762652580.3995059", + "retrieved_timestamp": "1762652580.399507", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANNv0.22.1", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANNv0.22.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3089469274857378 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46608928527824584 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3753020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33427526595744683 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.23/f7b617fa-7095-4eef-88bb-4fd73c23d5dc.json b/data/hfopenllm_v2/netcat420/MFANNv0.23/f7b617fa-7095-4eef-88bb-4fd73c23d5dc.json new file mode 100644 index 000000000..023c8582e --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANNv0.23/f7b617fa-7095-4eef-88bb-4fd73c23d5dc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.23/1762652580.3997262", + "retrieved_timestamp": "1762652580.399727", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANNv0.23", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANNv0.23" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3127435205255389 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4898102063834755 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04984894259818731 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3767916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33876329787234044 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.24/59e5fcd0-e46f-4346-b695-bee4dab9cfc4.json b/data/hfopenllm_v2/netcat420/MFANNv0.24/59e5fcd0-e46f-4346-b695-bee4dab9cfc4.json new file mode 100644 index 000000000..0aabbd26b --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANNv0.24/59e5fcd0-e46f-4346-b695-bee4dab9cfc4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.24/1762652580.3999438", + "retrieved_timestamp": "1762652580.3999438", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANNv0.24", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANNv0.24" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162409074588758 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.479027491915232 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3753958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3347739361702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netcat420/MFANNv0.25/e94f28ff-ae6c-4109-96a2-9dbe07621e03.json b/data/hfopenllm_v2/netcat420/MFANNv0.25/e94f28ff-ae6c-4109-96a2-9dbe07621e03.json new file mode 100644 index 000000000..b8e67229f --- /dev/null +++ b/data/hfopenllm_v2/netcat420/MFANNv0.25/e94f28ff-ae6c-4109-96a2-9dbe07621e03.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.25/1762652580.400151", + "retrieved_timestamp": "1762652580.400151", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netcat420/MFANNv0.25", + "developer": "netcat420", + "inference_platform": "unknown", + "id": "netcat420/MFANNv0.25" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34666573580322435 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47940650861209216 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0581570996978852 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36879166666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33427526595744683 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/netease-youdao/Confucius-o1-14B/ddd234e4-0665-4b36-943f-e99f0a293f50.json b/data/hfopenllm_v2/netease-youdao/Confucius-o1-14B/ddd234e4-0665-4b36-943f-e99f0a293f50.json new file mode 100644 index 000000000..a34f532e2 --- /dev/null +++ b/data/hfopenllm_v2/netease-youdao/Confucius-o1-14B/ddd234e4-0665-4b36-943f-e99f0a293f50.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/netease-youdao_Confucius-o1-14B/1762652580.4025002", + "retrieved_timestamp": "1762652580.402501", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "netease-youdao/Confucius-o1-14B", + "developer": "netease-youdao", + "inference_platform": "unknown", + "id": "netease-youdao/Confucius-o1-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6378497941018719 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6299772409698484 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4312688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3649328859060403 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4338125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5265126329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/newsbang/Homer-7B-v0.1/af9ae4eb-2fdf-414a-8585-4f0f894a6a49.json b/data/hfopenllm_v2/newsbang/Homer-7B-v0.1/af9ae4eb-2fdf-414a-8585-4f0f894a6a49.json new file mode 100644 index 000000000..62fd48ad5 --- /dev/null +++ b/data/hfopenllm_v2/newsbang/Homer-7B-v0.1/af9ae4eb-2fdf-414a-8585-4f0f894a6a49.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/newsbang_Homer-7B-v0.1/1762652580.402741", + "retrieved_timestamp": "1762652580.402742", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "newsbang/Homer-7B-v0.1", + "developer": "newsbang", + "inference_platform": "unknown", + "id": "newsbang/Homer-7B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6108724850064495 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5601389961416444 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3859516616314199 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43569791666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4474734042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/newsbang/Homer-7B-v0.2/d7964788-36a6-4b86-add6-cd8a1a42eb7c.json b/data/hfopenllm_v2/newsbang/Homer-7B-v0.2/d7964788-36a6-4b86-add6-cd8a1a42eb7c.json new file mode 100644 index 000000000..e76baf090 --- /dev/null +++ b/data/hfopenllm_v2/newsbang/Homer-7B-v0.2/d7964788-36a6-4b86-add6-cd8a1a42eb7c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/newsbang_Homer-7B-v0.2/1762652580.403213", + "retrieved_timestamp": "1762652580.4032168", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "newsbang/Homer-7B-v0.2", + "developer": "newsbang", + "inference_platform": "unknown", + "id": "newsbang/Homer-7B-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7493827488840721 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5517330182832224 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24773413897280966 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33221476510067116 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42975 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4409906914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nhyha/N3N_Delirium-v1_1030_0227/5128233e-41be-4e26-9ec2-2b7926c66b7c.json b/data/hfopenllm_v2/nhyha/N3N_Delirium-v1_1030_0227/5128233e-41be-4e26-9ec2-2b7926c66b7c.json new file mode 100644 index 000000000..9783c6a10 --- /dev/null +++ b/data/hfopenllm_v2/nhyha/N3N_Delirium-v1_1030_0227/5128233e-41be-4e26-9ec2-2b7926c66b7c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nhyha_N3N_Delirium-v1_1030_0227/1762652580.4055", + "retrieved_timestamp": "1762652580.4055", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nhyha/N3N_Delirium-v1_1030_0227", + "developer": "nhyha", + "inference_platform": "unknown", + "id": "nhyha/N3N_Delirium-v1_1030_0227" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8022890375315275 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5890686677822234 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2107250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.337248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40981249999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41497672872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216/928f9cd0-ce0f-43f7-aa5f-be9cbf4d91cd.json b/data/hfopenllm_v2/nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216/928f9cd0-ce0f-43f7-aa5f-be9cbf4d91cd.json new file mode 100644 index 000000000..18b79c26b --- /dev/null +++ b/data/hfopenllm_v2/nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216/928f9cd0-ce0f-43f7-aa5f-be9cbf4d91cd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nhyha_N3N_Llama-3.1-8B-Instruct_1028_0216/1762652580.405756", + "retrieved_timestamp": "1762652580.405757", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216", + "developer": "nhyha", + "inference_platform": "unknown", + "id": "nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4796063334175543 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5053741309920361 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17069486404833836 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40503125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36377992021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314/eb608d79-545a-4cc2-8d28-e539a3af7f17.json b/data/hfopenllm_v2/nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314/eb608d79-545a-4cc2-8d28-e539a3af7f17.json new file mode 100644 index 000000000..999de38f0 --- /dev/null +++ b/data/hfopenllm_v2/nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314/eb608d79-545a-4cc2-8d28-e539a3af7f17.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nhyha_merge_Qwen2.5-7B-Instruct_20241023_0314/1762652580.406431", + "retrieved_timestamp": "1762652580.406431", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314", + "developer": "nhyha", + "inference_platform": "unknown", + "id": "nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5694568190179834 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5558529241660143 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3542296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42506249999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45420545212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.8/67582e10-cebf-4938-bfca-2eb6883e2c39.json b/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.8/67582e10-cebf-4938-bfca-2eb6883e2c39.json new file mode 100644 index 000000000..f511adeb6 --- /dev/null +++ b/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.8/67582e10-cebf-4938-bfca-2eb6883e2c39.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nlpguy_Lion-Lamarck-v.1.0.8/1762652580.40752", + "retrieved_timestamp": "1762652580.407521", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nlpguy/Lion-Lamarck-v.1.0.8", + "developer": "nlpguy", + "inference_platform": "unknown", + "id": "nlpguy/Lion-Lamarck-v.1.0.8" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45090471061228654 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5868930914775694 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.554380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35822147651006714 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4672708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46434507978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.9/f5fa6816-051d-4d86-bef5-ba9731b8bd9a.json b/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.9/f5fa6816-051d-4d86-bef5-ba9731b8bd9a.json new file mode 100644 index 000000000..5ca077e0e --- /dev/null +++ b/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.0.9/f5fa6816-051d-4d86-bef5-ba9731b8bd9a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nlpguy_Lion-Lamarck-v.1.0.9/1762652580.407768", + "retrieved_timestamp": "1762652580.4077692", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nlpguy/Lion-Lamarck-v.1.0.9", + "developer": "nlpguy", + "inference_platform": "unknown", + "id": "nlpguy/Lion-Lamarck-v.1.0.9" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34089549063152436 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5918237099420903 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5641993957703928 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3901006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5299583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47041223404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.1.0/027ad81a-1271-4c25-9966-02370f6ee49d.json b/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.1.0/027ad81a-1271-4c25-9966-02370f6ee49d.json new file mode 100644 index 000000000..69884e4bd --- /dev/null +++ b/data/hfopenllm_v2/nlpguy/Lion-Lamarck-v.1.1.0/027ad81a-1271-4c25-9966-02370f6ee49d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nlpguy_Lion-Lamarck-v.1.1.0/1762652580.4079711", + "retrieved_timestamp": "1762652580.4079711", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nlpguy/Lion-Lamarck-v.1.1.0", + "developer": "nlpguy", + "inference_platform": "unknown", + "id": "nlpguy/Lion-Lamarck-v.1.1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3657750324694034 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5962460968547941 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5755287009063444 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3926174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.53253125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4630984042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nlpguy/Miisce-one/e557a750-53b2-4181-a19c-dfdeee11ee61.json b/data/hfopenllm_v2/nlpguy/Miisce-one/e557a750-53b2-4181-a19c-dfdeee11ee61.json new file mode 100644 index 000000000..419da1c6c --- /dev/null +++ b/data/hfopenllm_v2/nlpguy/Miisce-one/e557a750-53b2-4181-a19c-dfdeee11ee61.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nlpguy_Miisce-one/1762652580.4081762", + "retrieved_timestamp": "1762652580.408177", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nlpguy/Miisce-one", + "developer": "nlpguy", + "inference_platform": "unknown", + "id": "nlpguy/Miisce-one" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6065761069517768 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6504562869685913 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4169184290030212 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3859060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48198958333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5412234042553191 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nlpguy/StableProse/bedab076-13e7-468a-b8e8-dddb57d78583.json b/data/hfopenllm_v2/nlpguy/StableProse/bedab076-13e7-468a-b8e8-dddb57d78583.json new file mode 100644 index 000000000..c19960b91 --- /dev/null +++ b/data/hfopenllm_v2/nlpguy/StableProse/bedab076-13e7-468a-b8e8-dddb57d78583.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nlpguy_StableProse/1762652580.40907", + "retrieved_timestamp": "1762652580.40907", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nlpguy/StableProse", + "developer": "nlpguy", + "inference_platform": "unknown", + "id": "nlpguy/StableProse" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19723888172271792 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5116558625577087 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4067083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3468251329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nlpguy/StarFusion-alpha1/1d5c35ef-ec57-42a3-8459-6db62627c6d2.json b/data/hfopenllm_v2/nlpguy/StarFusion-alpha1/1d5c35ef-ec57-42a3-8459-6db62627c6d2.json new file mode 100644 index 000000000..f38d0acbe --- /dev/null +++ b/data/hfopenllm_v2/nlpguy/StarFusion-alpha1/1d5c35ef-ec57-42a3-8459-6db62627c6d2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nlpguy_StarFusion-alpha1/1762652580.409272", + "retrieved_timestamp": "1762652580.409272", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nlpguy/StarFusion-alpha1", + "developer": "nlpguy", + "inference_platform": "unknown", + "id": "nlpguy/StarFusion-alpha1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5660092997690572 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4428694115507034 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07175226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40810416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3190658244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/noname0202/Llama-3.2-4x3B-Instruct/e9511b0a-1083-4a0d-a9e0-97efcfc0891e.json b/data/hfopenllm_v2/noname0202/Llama-3.2-4x3B-Instruct/e9511b0a-1083-4a0d-a9e0-97efcfc0891e.json new file mode 100644 index 000000000..94770a093 --- /dev/null +++ b/data/hfopenllm_v2/noname0202/Llama-3.2-4x3B-Instruct/e9511b0a-1083-4a0d-a9e0-97efcfc0891e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/noname0202_Llama-3.2-4x3B-Instruct/1762652580.409481", + "retrieved_timestamp": "1762652580.409481", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "noname0202/Llama-3.2-4x3B-Instruct", + "developer": "noname0202", + "inference_platform": "unknown", + "id": "noname0202/Llama-3.2-4x3B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7067181744438091 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4647311192852755 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15861027190332327 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36739583333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3285405585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 9.949 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v1/b32d34eb-14b5-410a-8772-041d40ca73b8.json b/data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v1/b32d34eb-14b5-410a-8772-041d40ca73b8.json new file mode 100644 index 000000000..d94a56fcc --- /dev/null +++ b/data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v1/b32d34eb-14b5-410a-8772-041d40ca73b8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/noname0202_gemma-2-9b-sft-jp-en-zh-v1/1762652580.410035", + "retrieved_timestamp": "1762652580.410036", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "noname0202/gemma-2-9b-sft-jp-en-zh-v1", + "developer": "noname0202", + "inference_platform": "unknown", + "id": "noname0202/gemma-2-9b-sft-jp-en-zh-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29880494864736673 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4519290530910057 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40801041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3125 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v2/ee687c56-a9b4-4205-866b-b3067c066992.json b/data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v2/ee687c56-a9b4-4205-866b-b3067c066992.json new file mode 100644 index 000000000..f5a4d275a --- /dev/null +++ b/data/hfopenllm_v2/noname0202/gemma-2-9b-sft-jp-en-zh-v2/ee687c56-a9b4-4205-866b-b3067c066992.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/noname0202_gemma-2-9b-sft-jp-en-zh-v2/1762652580.4102452", + "retrieved_timestamp": "1762652580.4102452", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "noname0202/gemma-2-9b-sft-jp-en-zh-v2", + "developer": "noname0202", + "inference_platform": "unknown", + "id": "noname0202/gemma-2-9b-sft-jp-en-zh-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3993470657854493 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4515041184509401 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36115625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36751994680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning/7e0f008e-4327-4ee0-a810-b5564b651233.json b/data/hfopenllm_v2/notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning/7e0f008e-4327-4ee0-a810-b5564b651233.json new file mode 100644 index 000000000..185170d21 --- /dev/null +++ b/data/hfopenllm_v2/notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning/7e0f008e-4327-4ee0-a810-b5564b651233.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/notbdq_Qwen2.5-14B-Instruct-1M-GRPO-Reasoning/1762652580.4113228", + "retrieved_timestamp": "1762652580.4113238", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning", + "developer": "notbdq", + "inference_platform": "unknown", + "id": "notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8413564896696322 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6198222551365405 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5302114803625377 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34312080536912754 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.418 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4849567819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nothingiisreal/L3.1-8B-Celeste-V1.5/5b7a80ce-0fb2-4fb8-9381-184d7a434706.json b/data/hfopenllm_v2/nothingiisreal/L3.1-8B-Celeste-V1.5/5b7a80ce-0fb2-4fb8-9381-184d7a434706.json new file mode 100644 index 000000000..5e6e3438e --- /dev/null +++ b/data/hfopenllm_v2/nothingiisreal/L3.1-8B-Celeste-V1.5/5b7a80ce-0fb2-4fb8-9381-184d7a434706.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nothingiisreal_L3.1-8B-Celeste-V1.5/1762652580.4115741", + "retrieved_timestamp": "1762652580.411575", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nothingiisreal/L3.1-8B-Celeste-V1.5", + "developer": "nothingiisreal", + "inference_platform": "unknown", + "id": "nothingiisreal/L3.1-8B-Celeste-V1.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7326715337526651 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5011796822721141 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14652567975830816 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37486458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37042885638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v2/1ff70031-dbe8-467a-9dbd-9fd789b9841b.json b/data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v2/1ff70031-dbe8-467a-9dbd-9fd789b9841b.json new file mode 100644 index 000000000..f9446fa20 --- /dev/null +++ b/data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v2/1ff70031-dbe8-467a-9dbd-9fd789b9841b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nothingiisreal_MN-12B-Starcannon-v2/1762652580.411832", + "retrieved_timestamp": "1762652580.411832", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nothingiisreal/MN-12B-Starcannon-v2", + "developer": "nothingiisreal", + "inference_platform": "unknown", + "id": "nothingiisreal/MN-12B-Starcannon-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3925273828995953 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5004499888471767 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05966767371601209 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39781249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31283244680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v3/633a786a-fe99-4a6e-b402-888e36e8b6c9.json b/data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v3/633a786a-fe99-4a6e-b402-888e36e8b6c9.json new file mode 100644 index 000000000..fc4c4e5ee --- /dev/null +++ b/data/hfopenllm_v2/nothingiisreal/MN-12B-Starcannon-v3/633a786a-fe99-4a6e-b402-888e36e8b6c9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nothingiisreal_MN-12B-Starcannon-v3/1762652580.412042", + "retrieved_timestamp": "1762652580.412042", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nothingiisreal/MN-12B-Starcannon-v3", + "developer": "nothingiisreal", + "inference_platform": "unknown", + "id": "nothingiisreal/MN-12B-Starcannon-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38073755413414184 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5170553444795719 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07779456193353475 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40463541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32646276595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/AceInstruct-1.5B/a26b4b3f-aad1-4d2f-a97a-bf24850a3092.json b/data/hfopenllm_v2/nvidia/AceInstruct-1.5B/a26b4b3f-aad1-4d2f-a97a-bf24850a3092.json new file mode 100644 index 000000000..2e28f324e --- /dev/null +++ b/data/hfopenllm_v2/nvidia/AceInstruct-1.5B/a26b4b3f-aad1-4d2f-a97a-bf24850a3092.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nvidia_AceInstruct-1.5B/1762652580.412246", + "retrieved_timestamp": "1762652580.412247", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nvidia/AceInstruct-1.5B", + "developer": "nvidia", + "inference_platform": "unknown", + "id": "nvidia/AceInstruct-1.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3947758613811354 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3931958135346713 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31268882175226587 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34600000000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2573969414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/AceInstruct-72B/08e924b1-121c-4ff7-bf1d-06b9cb90c7c0.json b/data/hfopenllm_v2/nvidia/AceInstruct-72B/08e924b1-121c-4ff7-bf1d-06b9cb90c7c0.json new file mode 100644 index 000000000..8531d2f1b --- /dev/null +++ b/data/hfopenllm_v2/nvidia/AceInstruct-72B/08e924b1-121c-4ff7-bf1d-06b9cb90c7c0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nvidia_AceInstruct-72B/1762652580.4124959", + "retrieved_timestamp": "1762652580.4124968", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nvidia/AceInstruct-72B", + "developer": "nvidia", + "inference_platform": "unknown", + "id": "nvidia/AceInstruct-72B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.711888899231816 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6139041785911337 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6261329305135952 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3213087248322148 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42060416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48736702127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/AceInstruct-7B/d0680660-92e5-471b-a4c9-2658e7c59dd0.json b/data/hfopenllm_v2/nvidia/AceInstruct-7B/d0680660-92e5-471b-a4c9-2658e7c59dd0.json new file mode 100644 index 000000000..001f18cf0 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/AceInstruct-7B/d0680660-92e5-471b-a4c9-2658e7c59dd0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nvidia_AceInstruct-7B/1762652580.412692", + "retrieved_timestamp": "1762652580.412693", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nvidia/AceInstruct-7B", + "developer": "nvidia", + "inference_platform": "unknown", + "id": "nvidia/AceInstruct-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5422290633297429 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.550118130896558 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5294561933534743 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4255 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.417719414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/AceMath-1.5B-Instruct/8584e2c5-dd32-4cd0-9089-1b4e17a1ffac.json b/data/hfopenllm_v2/nvidia/AceMath-1.5B-Instruct/8584e2c5-dd32-4cd0-9089-1b4e17a1ffac.json new file mode 100644 index 000000000..635213ef4 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/AceMath-1.5B-Instruct/8584e2c5-dd32-4cd0-9089-1b4e17a1ffac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nvidia_AceMath-1.5B-Instruct/1762652580.412895", + "retrieved_timestamp": "1762652580.412896", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nvidia/AceMath-1.5B-Instruct", + "developer": "nvidia", + "inference_platform": "unknown", + "id": "nvidia/AceMath-1.5B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32123654126606294 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4024301274933693 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5287009063444109 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3606979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20636635638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/AceMath-72B-Instruct/4ba1027b-f0c1-4ed9-aa30-35c4e01e564d.json b/data/hfopenllm_v2/nvidia/AceMath-72B-Instruct/4ba1027b-f0c1-4ed9-aa30-35c4e01e564d.json new file mode 100644 index 000000000..2fc5d25d0 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/AceMath-72B-Instruct/4ba1027b-f0c1-4ed9-aa30-35c4e01e564d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nvidia_AceMath-72B-Instruct/1762652580.413093", + "retrieved_timestamp": "1762652580.4130938", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nvidia/AceMath-72B-Instruct", + "developer": "nvidia", + "inference_platform": "unknown", + "id": "nvidia/AceMath-72B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.494993284485166 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.640215611099268 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7145015105740181 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40615625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44107380319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/AceMath-72B-RM/5fdd0c8f-3393-4b59-8cc1-511c524c493a.json b/data/hfopenllm_v2/nvidia/AceMath-72B-RM/5fdd0c8f-3393-4b59-8cc1-511c524c493a.json new file mode 100644 index 000000000..3531e45ca --- /dev/null +++ b/data/hfopenllm_v2/nvidia/AceMath-72B-RM/5fdd0c8f-3393-4b59-8cc1-511c524c493a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nvidia_AceMath-72B-RM/1762652580.413297", + "retrieved_timestamp": "1762652580.413298", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nvidia/AceMath-72B-RM", + "developer": "nvidia", + "inference_platform": "unknown", + "id": "nvidia/AceMath-72B-RM" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14125963554479892 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2717426350897727 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23406040268456377 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3351458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11785239361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForSequenceClassification", + "params_billions": 71.461 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/AceMath-7B-Instruct/e1c94d59-dfa4-49cf-9052-9ce6e713a0be.json b/data/hfopenllm_v2/nvidia/AceMath-7B-Instruct/e1c94d59-dfa4-49cf-9052-9ce6e713a0be.json new file mode 100644 index 000000000..3ed0e275a --- /dev/null +++ b/data/hfopenllm_v2/nvidia/AceMath-7B-Instruct/e1c94d59-dfa4-49cf-9052-9ce6e713a0be.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nvidia_AceMath-7B-Instruct/1762652580.413503", + "retrieved_timestamp": "1762652580.413504", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nvidia/AceMath-7B-Instruct", + "developer": "nvidia", + "inference_platform": "unknown", + "id": "nvidia/AceMath-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45317756885064964 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49938547326244365 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6336858006042296 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4192708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33834773936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/AceMath-7B-RM/ab9c685d-7b97-4bf4-bc0e-ffd5666e35d9.json b/data/hfopenllm_v2/nvidia/AceMath-7B-RM/ab9c685d-7b97-4bf4-bc0e-ffd5666e35d9.json new file mode 100644 index 000000000..7dfd64f2b --- /dev/null +++ b/data/hfopenllm_v2/nvidia/AceMath-7B-RM/ab9c685d-7b97-4bf4-bc0e-ffd5666e35d9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nvidia_AceMath-7B-RM/1762652580.4138508", + "retrieved_timestamp": "1762652580.413853", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nvidia/AceMath-7B-RM", + "developer": "nvidia", + "inference_platform": "unknown", + "id": "nvidia/AceMath-7B-RM" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14937809456686035 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2422689292768334 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24580536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35800000000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11386303191489362 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForSequenceClassification", + "params_billions": 7.071 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/Hymba-1.5B-Base/89f9149f-1f6d-4389-819a-d958b0ecc6b8.json b/data/hfopenllm_v2/nvidia/Hymba-1.5B-Base/89f9149f-1f6d-4389-819a-d958b0ecc6b8.json new file mode 100644 index 000000000..1f1463493 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/Hymba-1.5B-Base/89f9149f-1f6d-4389-819a-d958b0ecc6b8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nvidia_Hymba-1.5B-Base/1762652580.4142", + "retrieved_timestamp": "1762652580.4142022", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nvidia/Hymba-1.5B-Base", + "developer": "nvidia", + "inference_platform": "unknown", + "id": "nvidia/Hymba-1.5B-Base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2295121389025563 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32564785214182224 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3566354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19223736702127658 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "HymbaForCausalLM", + "params_billions": 1.523 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/Hymba-1.5B-Instruct/ae6e9c29-eb12-4dd5-bdbc-e84b499cf40f.json b/data/hfopenllm_v2/nvidia/Hymba-1.5B-Instruct/ae6e9c29-eb12-4dd5-bdbc-e84b499cf40f.json new file mode 100644 index 000000000..fbab24f93 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/Hymba-1.5B-Instruct/ae6e9c29-eb12-4dd5-bdbc-e84b499cf40f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nvidia_Hymba-1.5B-Instruct/1762652580.414529", + "retrieved_timestamp": "1762652580.41453", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nvidia/Hymba-1.5B-Instruct", + "developer": "nvidia", + "inference_platform": "unknown", + "id": "nvidia/Hymba-1.5B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6009055971488984 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3067133908231881 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027190332326283987 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33158333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20403922872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "HymbaForCausalLM", + "params_billions": 1.523 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/2366b5e1-0a56-4d6e-83e6-12f12eca3ec4.json b/data/hfopenllm_v2/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/2366b5e1-0a56-4d6e-83e6-12f12eca3ec4.json new file mode 100644 index 000000000..6de7c4561 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/2366b5e1-0a56-4d6e-83e6-12f12eca3ec4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nvidia_Llama-3.1-Nemotron-70B-Instruct-HF/1762652580.415039", + "retrieved_timestamp": "1762652580.41504", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", + "developer": "nvidia", + "inference_platform": "unknown", + "id": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7380672172059026 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6316000668895038 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42673716012084595 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4327604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49185505319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/Minitron-4B-Base/f5e52953-2dfc-4661-81cd-ed96d7a52482.json b/data/hfopenllm_v2/nvidia/Minitron-4B-Base/f5e52953-2dfc-4661-81cd-ed96d7a52482.json new file mode 100644 index 000000000..e74b74014 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/Minitron-4B-Base/f5e52953-2dfc-4661-81cd-ed96d7a52482.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nvidia_Minitron-4B-Base/1762652580.415251", + "retrieved_timestamp": "1762652580.415252", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nvidia/Minitron-4B-Base", + "developer": "nvidia", + "inference_platform": "unknown", + "id": "nvidia/Minitron-4B-Base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2217937295265451 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4083876243992497 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.413375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.261968085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "NemotronForCausalLM", + "params_billions": 4.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/Minitron-8B-Base/3f6ec864-adf4-422f-85c1-19ef2417489a.json b/data/hfopenllm_v2/nvidia/Minitron-8B-Base/3f6ec864-adf4-422f-85c1-19ef2417489a.json new file mode 100644 index 000000000..2527591f6 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/Minitron-8B-Base/3f6ec864-adf4-422f-85c1-19ef2417489a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nvidia_Minitron-8B-Base/1762652580.415456", + "retrieved_timestamp": "1762652580.415456", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nvidia/Minitron-8B-Base", + "developer": "nvidia", + "inference_platform": "unknown", + "id": "nvidia/Minitron-8B-Base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24242676099416216 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43950631883576047 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0256797583081571 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40255208333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31806848404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "NemotronForCausalLM", + "params_billions": 7.22 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/Mistral-NeMo-Minitron-8B-Instruct/f4c299f0-d957-4784-8512-23f72a26a095.json b/data/hfopenllm_v2/nvidia/Mistral-NeMo-Minitron-8B-Instruct/f4c299f0-d957-4784-8512-23f72a26a095.json new file mode 100644 index 000000000..c7ec6c0a2 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/Mistral-NeMo-Minitron-8B-Instruct/f4c299f0-d957-4784-8512-23f72a26a095.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nvidia_Mistral-NeMo-Minitron-8B-Instruct/1762652580.415967", + "retrieved_timestamp": "1762652580.415968", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nvidia/Mistral-NeMo-Minitron-8B-Instruct", + "developer": "nvidia", + "inference_platform": "unknown", + "id": "nvidia/Mistral-NeMo-Minitron-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5003889679384035 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5320919605840294 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1163141993957704 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38857291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39910239361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.414 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nvidia/Nemotron-Mini-4B-Instruct/ab7ee3ac-4d47-4ec6-a2af-8a6f7eb96684.json b/data/hfopenllm_v2/nvidia/Nemotron-Mini-4B-Instruct/ab7ee3ac-4d47-4ec6-a2af-8a6f7eb96684.json new file mode 100644 index 000000000..66614dbb7 --- /dev/null +++ b/data/hfopenllm_v2/nvidia/Nemotron-Mini-4B-Instruct/ab7ee3ac-4d47-4ec6-a2af-8a6f7eb96684.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nvidia_Nemotron-Mini-4B-Instruct/1762652580.41618", + "retrieved_timestamp": "1762652580.416181", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nvidia/Nemotron-Mini-4B-Instruct", + "developer": "nvidia", + "inference_platform": "unknown", + "id": "nvidia/Nemotron-Mini-4B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6668761109411916 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3864840798591535 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0256797583081571 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3767291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26263297872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "NemotronForCausalLM", + "params_billions": 4.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/nxmwxm/Beast-Soul-new/4ae25fa0-54af-4f47-853f-c97cd7b312d3.json b/data/hfopenllm_v2/nxmwxm/Beast-Soul-new/4ae25fa0-54af-4f47-853f-c97cd7b312d3.json new file mode 100644 index 000000000..da6aaea8d --- /dev/null +++ b/data/hfopenllm_v2/nxmwxm/Beast-Soul-new/4ae25fa0-54af-4f47-853f-c97cd7b312d3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/nxmwxm_Beast-Soul-new/1762652580.416598", + "retrieved_timestamp": "1762652580.416599", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "nxmwxm/Beast-Soul-new", + "developer": "nxmwxm", + "inference_platform": "unknown", + "id": "nxmwxm/Beast-Soul-new" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48687482546310457 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5227143628884523 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07401812688821752 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4459270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3101728723404255 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/occiglot/occiglot-7b-es-en-instruct/4207b47d-711c-4af8-9c70-becb270973eb.json b/data/hfopenllm_v2/occiglot/occiglot-7b-es-en-instruct/4207b47d-711c-4af8-9c70-becb270973eb.json new file mode 100644 index 000000000..4bdcf1e8c --- /dev/null +++ b/data/hfopenllm_v2/occiglot/occiglot-7b-es-en-instruct/4207b47d-711c-4af8-9c70-becb270973eb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/occiglot_occiglot-7b-es-en-instruct/1762652580.416852", + "retrieved_timestamp": "1762652580.416853", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "occiglot/occiglot-7b-es-en-instruct", + "developer": "occiglot", + "inference_platform": "unknown", + "id": "occiglot/occiglot-7b-es-en-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3485141646387142 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4110970229781084 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02416918429003021 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2310505319148936 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/odyssey-labs/Astral-1-10B/4fefa5ae-d421-4883-b734-d6cc8bd8f4d6.json b/data/hfopenllm_v2/odyssey-labs/Astral-1-10B/4fefa5ae-d421-4883-b734-d6cc8bd8f4d6.json new file mode 100644 index 000000000..12a3cf0e4 --- /dev/null +++ b/data/hfopenllm_v2/odyssey-labs/Astral-1-10B/4fefa5ae-d421-4883-b734-d6cc8bd8f4d6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/odyssey-labs_Astral-1-10B/1762652580.417092", + "retrieved_timestamp": "1762652580.417093", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "odyssey-labs/Astral-1-10B", + "developer": "odyssey-labs", + "inference_platform": "unknown", + "id": "odyssey-labs/Astral-1-10B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38780657544204933 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4872563924334199 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42797916666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29853723404255317 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/olabs-ai/reflection_model/84b63639-3343-4568-9fa7-d353ccb5b465.json b/data/hfopenllm_v2/olabs-ai/reflection_model/84b63639-3343-4568-9fa7-d353ccb5b465.json new file mode 100644 index 000000000..ac49b94ad --- /dev/null +++ b/data/hfopenllm_v2/olabs-ai/reflection_model/84b63639-3343-4568-9fa7-d353ccb5b465.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/olabs-ai_reflection_model/1762652580.417324", + "retrieved_timestamp": "1762652580.417325", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "olabs-ai/reflection_model", + "developer": "olabs-ai", + "inference_platform": "unknown", + "id": "olabs-ai/reflection_model" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15986914719610634 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4712508645838735 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35083333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33111702127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 9.3 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only/8b50fd5a-9f95-4213-98e2-ee66e1602cdf.json b/data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only/8b50fd5a-9f95-4213-98e2-ee66e1602cdf.json new file mode 100644 index 000000000..b0ee96411 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only/8b50fd5a-9f95-4213-98e2-ee66e1602cdf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_RedPajama-3B-v1-AutoRedteam-Harmless-only/1762652580.418057", + "retrieved_timestamp": "1762652580.418057", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.152475431854147 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3123669789182832 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23154362416107382 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.366125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10995678191489362 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 2.776 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam/9f85efe5-9fe1-4ad3-9438-da4dbf886f9d.json b/data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam/9f85efe5-9fe1-4ad3-9438-da4dbf886f9d.json new file mode 100644 index 000000000..969e9d62e --- /dev/null +++ b/data/hfopenllm_v2/ontocord/RedPajama-3B-v1-AutoRedteam/9f85efe5-9fe1-4ad3-9438-da4dbf886f9d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_RedPajama-3B-v1-AutoRedteam/1762652580.4178078", + "retrieved_timestamp": "1762652580.4178078", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/RedPajama-3B-v1-AutoRedteam", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/RedPajama-3B-v1-AutoRedteam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13434021729012352 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30256825198631376 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2424496644295302 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36606249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1107878989361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 2.776 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/RedPajama3b_v1-autoredteam_helpfulness-train/d070a397-6bd5-4407-b030-aecdc31eb47c.json b/data/hfopenllm_v2/ontocord/RedPajama3b_v1-autoredteam_helpfulness-train/d070a397-6bd5-4407-b030-aecdc31eb47c.json new file mode 100644 index 000000000..29c46b62f --- /dev/null +++ b/data/hfopenllm_v2/ontocord/RedPajama3b_v1-autoredteam_helpfulness-train/d070a397-6bd5-4407-b030-aecdc31eb47c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_RedPajama3b_v1-autoredteam_helpfulness-train/1762652580.4182642", + "retrieved_timestamp": "1762652580.418265", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/RedPajama3b_v1-autoredteam_helpfulness-train", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/RedPajama3b_v1-autoredteam_helpfulness-train" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2847666414003732 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30927408550278385 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24580536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35796875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11070478723404255 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 2.776 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8-stack_2x/a0cdb8e9-7920-41eb-864d-9995c3168277.json b/data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8-stack_2x/a0cdb8e9-7920-41eb-864d-9995c3168277.json new file mode 100644 index 000000000..f590427b2 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8-stack_2x/a0cdb8e9-7920-41eb-864d-9995c3168277.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_merged_0.2_expert_0.8-stack_2x/1762652580.418678", + "retrieved_timestamp": "1762652580.418679", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/merged_0.2_expert_0.8-stack_2x", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/merged_0.2_expert_0.8-stack_2x" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17960345217356613 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30061312694162695 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.024924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3540625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11028922872340426 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 6.512 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8/c373de55-1c2e-4cd5-a0e9-ec462f80010f.json b/data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8/c373de55-1c2e-4cd5-a0e9-ec462f80010f.json new file mode 100644 index 000000000..28b2ad531 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/merged_0.2_expert_0.8/c373de55-1c2e-4cd5-a0e9-ec462f80010f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_merged_0.2_expert_0.8/1762652580.418474", + "retrieved_timestamp": "1762652580.418475", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/merged_0.2_expert_0.8", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/merged_0.2_expert_0.8" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17425763640473943 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3046000784127159 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36206249999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11112034574468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/merged_0.5_expert_0.5/d3dccfbc-ccc3-4d7c-abe3-4669c8efca3b.json b/data/hfopenllm_v2/ontocord/merged_0.5_expert_0.5/d3dccfbc-ccc3-4d7c-abe3-4669c8efca3b.json new file mode 100644 index 000000000..d7bf52dac --- /dev/null +++ b/data/hfopenllm_v2/ontocord/merged_0.5_expert_0.5/d3dccfbc-ccc3-4d7c-abe3-4669c8efca3b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_merged_0.5_expert_0.5/1762652580.418875", + "retrieved_timestamp": "1762652580.418876", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/merged_0.5_expert_0.5", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/merged_0.5_expert_0.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1787291054402319 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3017011118802398 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35424999999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1107878989361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful/93164a9c-187c-45eb-94e0-12910b6ebd9d.json b/data/hfopenllm_v2/ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful/93164a9c-187c-45eb-94e0-12910b6ebd9d.json new file mode 100644 index 000000000..8a1a8d0ed --- /dev/null +++ b/data/hfopenllm_v2/ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful/93164a9c-187c-45eb-94e0-12910b6ebd9d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful/1762652580.419096", + "retrieved_timestamp": "1762652580.419096", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13184240038652995 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3004467893724157 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36311458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11419547872340426 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1-instruct/92e8e4af-bdfd-4fb3-8b25-b7b88470c56c.json b/data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1-instruct/92e8e4af-bdfd-4fb3-8b25-b7b88470c56c.json new file mode 100644 index 000000000..8e3b9cf0d --- /dev/null +++ b/data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1-instruct/92e8e4af-bdfd-4fb3-8b25-b7b88470c56c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_ontocord_wide_7b-stacked-stage1-instruct/1762652580.4195461", + "retrieved_timestamp": "1762652580.4195468", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/ontocord_wide_7b-stacked-stage1-instruct", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/ontocord_wide_7b-stacked-stage1-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15302508455342934 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2853913447506418 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24664429530201343 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35378125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11170212765957446 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.888 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1/8098c6f4-c2a4-44d9-92b5-72dfccd83395.json b/data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1/8098c6f4-c2a4-44d9-92b5-72dfccd83395.json new file mode 100644 index 000000000..d19652512 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/ontocord_wide_7b-stacked-stage1/8098c6f4-c2a4-44d9-92b5-72dfccd83395.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_ontocord_wide_7b-stacked-stage1/1762652580.41932", + "retrieved_timestamp": "1762652580.419321", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/ontocord_wide_7b-stacked-stage1", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/ontocord_wide_7b-stacked-stage1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14845388014911545 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28965200351622594 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3603541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11053856382978723 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.888 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/starcoder2-29b-ls/68285cd4-9573-4fa7-af6f-321c7b4c8171.json b/data/hfopenllm_v2/ontocord/starcoder2-29b-ls/68285cd4-9573-4fa7-af6f-321c7b4c8171.json new file mode 100644 index 000000000..9a2a210da --- /dev/null +++ b/data/hfopenllm_v2/ontocord/starcoder2-29b-ls/68285cd4-9573-4fa7-af6f-321c7b4c8171.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_starcoder2-29b-ls/1762652580.419764", + "retrieved_timestamp": "1762652580.419765", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/starcoder2-29b-ls", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/starcoder2-29b-ls" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21492417895628046 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37349755200329665 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0188821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36999999999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1869182180851064 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Starcoder2ForCausalLM", + "params_billions": 29.009 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/starcoder2_3b-AutoRedteam/9ae53763-119d-40af-bdf2-97dd34eaf9e3.json b/data/hfopenllm_v2/ontocord/starcoder2_3b-AutoRedteam/9ae53763-119d-40af-bdf2-97dd34eaf9e3.json new file mode 100644 index 000000000..5ad2b43b6 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/starcoder2_3b-AutoRedteam/9ae53763-119d-40af-bdf2-97dd34eaf9e3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_starcoder2_3b-AutoRedteam/1762652580.419971", + "retrieved_timestamp": "1762652580.4199722", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/starcoder2_3b-AutoRedteam", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/starcoder2_3b-AutoRedteam" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15737133029251277 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3497644619743598 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3645729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13364361702127658 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Starcoder2ForCausalLM", + "params_billions": 3.181 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b-merge_test/db2c4148-d7be-4f13-a449-095b78bda7c2.json b/data/hfopenllm_v2/ontocord/wide_3b-merge_test/db2c4148-d7be-4f13-a449-095b78bda7c2.json new file mode 100644 index 000000000..b347bb893 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b-merge_test/db2c4148-d7be-4f13-a449-095b78bda7c2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b-merge_test/1762652580.420181", + "retrieved_timestamp": "1762652580.420182", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_3b-merge_test", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_3b-merge_test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17628115622104903 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011467446788138 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23993288590604026 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.342 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10663231382978723 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained/91ac4c22-3f2a-48fd-aad8-5c26a5f07ea6.json b/data/hfopenllm_v2/ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained/91ac4c22-3f2a-48fd-aad8-5c26a5f07ea6.json new file mode 100644 index 000000000..9640db683 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained/91ac4c22-3f2a-48fd-aad8-5c26a5f07ea6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained/1762652580.420386", + "retrieved_timestamp": "1762652580.420386", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13946107439371977 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30036095049490824 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36320833333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11402925531914894 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge/c5a9d4e0-a43b-4249-abbb-f544bdb2d806.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge/c5a9d4e0-a43b-4249-abbb-f544bdb2d806.json new file mode 100644 index 000000000..451e227e1 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge/c5a9d4e0-a43b-4249-abbb-f544bdb2d806.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge/1762652580.420605", + "retrieved_timestamp": "1762652580.420605", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16636413604790845 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30309127879396963 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.011329305135951661 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3845416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11112034574468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge/5b9a91bc-bdca-468e-b8eb-b0e97fd97148.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge/5b9a91bc-bdca-468e-b8eb-b0e97fd97148.json new file mode 100644 index 000000000..e7f67f0f4 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge/5b9a91bc-bdca-468e-b8eb-b0e97fd97148.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge/1762652580.420933", + "retrieved_timestamp": "1762652580.420937", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16973629968483622 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2975125970659158 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37781249999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1124501329787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue/aeda694a-795c-4a42-8b40-d406b7223627.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue/aeda694a-795c-4a42-8b40-d406b7223627.json new file mode 100644 index 000000000..d646a2a44 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue/aeda694a-795c-4a42-8b40-d406b7223627.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue/1762652580.4213939", + "retrieved_timestamp": "1762652580.4213948", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14800396281865452 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30953444521357315 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3579375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1107878989361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue/3e26804b-13fa-4115-a000-d6be3339e7b1.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue/3e26804b-13fa-4115-a000-d6be3339e7b1.json new file mode 100644 index 000000000..d9fa0a933 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue/3e26804b-13fa-4115-a000-d6be3339e7b1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue/1762652580.4216871", + "retrieved_timestamp": "1762652580.421689", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12367407368005781 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3060091508023586 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3672708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11112034574468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue/d1f24979-eced-4dca-a5a1-4e4bfee28779.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue/d1f24979-eced-4dca-a5a1-4e4bfee28779.json new file mode 100644 index 000000000..8921c9757 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue/d1f24979-eced-4dca-a5a1-4e4bfee28779.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue/1762652580.42205", + "retrieved_timestamp": "1762652580.422051", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1191527369601546 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2955590587949957 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35530208333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11826795212765957 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/171ae287-000a-491e-9ecb-ac7d29217e9e.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/171ae287-000a-491e-9ecb-ac7d29217e9e.json new file mode 100644 index 000000000..47e092745 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/171ae287-000a-491e-9ecb-ac7d29217e9e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/1762652580.42265", + "retrieved_timestamp": "1762652580.4226508", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1161551350416894 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3184343946486203 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0075528700906344415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34469791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11236702127659574 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/b7a0e530-08f8-4c6a-9258-733b59096812.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/b7a0e530-08f8-4c6a-9258-733b59096812.json new file mode 100644 index 000000000..f966bffb8 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/b7a0e530-08f8-4c6a-9258-733b59096812.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/1762652580.422383", + "retrieved_timestamp": "1762652580.422384", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1128328390891723 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3171441625189962 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.011329305135951661 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26845637583892623 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34603125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11294880319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue/f14d0513-676d-45e3-97c4-bf386f61b856.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue/f14d0513-676d-45e3-97c4-bf386f61b856.json new file mode 100644 index 000000000..15779d14a --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue/f14d0513-676d-45e3-97c4-bf386f61b856.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue/1762652580.422879", + "retrieved_timestamp": "1762652580.42288", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13169279733329786 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30640062669813056 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34460416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11444481382978723 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue/4d673b5a-3237-433f-9e08-f614fe10edc4.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue/4d673b5a-3237-433f-9e08-f614fe10edc4.json new file mode 100644 index 000000000..af0e3e5f5 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue/4d673b5a-3237-433f-9e08-f614fe10edc4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue/1762652580.4231439", + "retrieved_timestamp": "1762652580.423145", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.118178654857999 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3037498354512724 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.008308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35669791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11619015957446809 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue/e19c2b24-4deb-45b4-a0a9-2d055bc90446.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue/e19c2b24-4deb-45b4-a0a9-2d055bc90446.json new file mode 100644 index 000000000..9cf24a0d6 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue/e19c2b24-4deb-45b4-a0a9-2d055bc90446.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue/1762652580.423407", + "retrieved_timestamp": "1762652580.423407", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12399876771410967 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30324371251012056 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0075528700906344415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34869791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11278257978723404 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue/449f6b1a-5264-4c7b-82d6-60e61841b7d6.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue/449f6b1a-5264-4c7b-82d6-60e61841b7d6.json new file mode 100644 index 000000000..e4842a9e7 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue/449f6b1a-5264-4c7b-82d6-60e61841b7d6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_math.no_issue/1762652580.423659", + "retrieved_timestamp": "1762652580.42366", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12981888057022034 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30518984588252307 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39276041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1146941489361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue/2e22170f-839d-482d-bc8a-ed345aa900af.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue/2e22170f-839d-482d-bc8a-ed345aa900af.json new file mode 100644 index 000000000..7b47d45ea --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue/2e22170f-839d-482d-bc8a-ed345aa900af.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue/1762652580.4239051", + "retrieved_timestamp": "1762652580.4239051", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20490742341431845 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911778102988436 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35753125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11668882978723404 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical/75f9224b-df09-4693-8b04-c00e17785250.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical/75f9224b-df09-4693-8b04-c00e17785250.json new file mode 100644 index 000000000..982ae3a2f --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical/75f9224b-df09-4693-8b04-c00e17785250.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical/1762652580.42415", + "retrieved_timestamp": "1762652580.424151", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.146105666298754 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29981162881428614 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39257291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1141123670212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text/4bd52ced-e009-4805-8d0a-ce37b25f103c.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text/4bd52ced-e009-4805-8d0a-ce37b25f103c.json new file mode 100644 index 000000000..fadef04b9 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text/4bd52ced-e009-4805-8d0a-ce37b25f103c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_formatted_text/1762652580.424435", + "retrieved_timestamp": "1762652580.424437", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14872870649875664 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3068950688059236 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34739583333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11461103723404255 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to/14e2e5a7-d43c-4a02-9af6-6c378778d7fc.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to/14e2e5a7-d43c-4a02-9af6-6c378778d7fc.json new file mode 100644 index 000000000..920dde00e --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to/14e2e5a7-d43c-4a02-9af6-6c378778d7fc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_how-to/1762652580.424736", + "retrieved_timestamp": "1762652580.424736", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12454842041339201 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3047398483929371 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36581250000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11527593085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_math/d2d7e55e-87a3-4390-a1e4-47a2d0c62bd2.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_math/d2d7e55e-87a3-4390-a1e4-47a2d0c62bd2.json new file mode 100644 index 000000000..eb8784fd7 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_math/d2d7e55e-87a3-4390-a1e4-47a2d0c62bd2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_math/1762652580.42496", + "retrieved_timestamp": "1762652580.424961", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_math", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_math" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19151850423542865 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3059577262726771 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37003125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10920877659574468 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_news/a13cf03f-cf1a-49a8-ba6c-d6e3b27036fa.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_news/a13cf03f-cf1a-49a8-ba6c-d6e3b27036fa.json new file mode 100644 index 000000000..20a1a8146 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_news/a13cf03f-cf1a-49a8-ba6c-d6e3b27036fa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_news/1762652580.425178", + "retrieved_timestamp": "1762652580.4251788", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_news", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_news" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16581448334862608 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2925879483112595 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01661631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36209375000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11112034574468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_software/dab94fc0-5bea-4875-a802-8ef793bc7fc7.json b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_software/dab94fc0-5bea-4875-a802-8ef793bc7fc7.json new file mode 100644 index 000000000..97a90f9a5 --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_3b_sft_stage1.2-ss1-expert_software/dab94fc0-5bea-4875-a802-8ef793bc7fc7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_software/1762652580.425399", + "retrieved_timestamp": "1762652580.4254", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_software", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_software" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1733832896714052 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2979956844198214 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35685416666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11402925531914894 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.759 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked/e16d5502-1721-424f-a149-9a6233a2183a.json b/data/hfopenllm_v2/ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked/e16d5502-1721-424f-a149-9a6233a2183a.json new file mode 100644 index 000000000..52786333c --- /dev/null +++ b/data/hfopenllm_v2/ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked/e16d5502-1721-424f-a149-9a6233a2183a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ontocord_wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked/1762652580.425614", + "retrieved_timestamp": "1762652580.425615", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked", + "developer": "ontocord", + "inference_platform": "unknown", + "id": "ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12439881736015992 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30264484636677236 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3686354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11145279255319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.888 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/oobabooga/CodeBooga-34B-v0.1/3086045f-e22d-4aca-9459-fc64454a2fb2.json b/data/hfopenllm_v2/oobabooga/CodeBooga-34B-v0.1/3086045f-e22d-4aca-9459-fc64454a2fb2.json new file mode 100644 index 000000000..481a5d6ea --- /dev/null +++ b/data/hfopenllm_v2/oobabooga/CodeBooga-34B-v0.1/3086045f-e22d-4aca-9459-fc64454a2fb2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/oobabooga_CodeBooga-34B-v0.1/1762652580.425838", + "retrieved_timestamp": "1762652580.425838", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "oobabooga/CodeBooga-34B-v0.1", + "developer": "oobabooga", + "inference_platform": "unknown", + "id": "oobabooga/CodeBooga-34B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5250180631834643 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3427441185661722 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03927492447129909 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43102083333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23595412234042554 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 33.744 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/open-atlas/Atlas-Flash-1.5B-Preview/96ae17c1-69ef-46c6-bb15-c1b576ba8131.json b/data/hfopenllm_v2/open-atlas/Atlas-Flash-1.5B-Preview/96ae17c1-69ef-46c6-bb15-c1b576ba8131.json new file mode 100644 index 000000000..b1c619cca --- /dev/null +++ b/data/hfopenllm_v2/open-atlas/Atlas-Flash-1.5B-Preview/96ae17c1-69ef-46c6-bb15-c1b576ba8131.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/open-atlas_Atlas-Flash-1.5B-Preview/1762652580.4281778", + "retrieved_timestamp": "1762652580.4281778", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "open-atlas/Atlas-Flash-1.5B-Preview", + "developer": "open-atlas", + "inference_platform": "unknown", + "id": "open-atlas/Atlas-Flash-1.5B-Preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3269569187533522 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3215460102660847 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2212990936555891 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34879166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13738364361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/open-atlas/Atlas-Flash-7B-Preview/6fd7bb75-6648-4bfe-a232-f9efe4b7c45e.json b/data/hfopenllm_v2/open-atlas/Atlas-Flash-7B-Preview/6fd7bb75-6648-4bfe-a232-f9efe4b7c45e.json new file mode 100644 index 000000000..a4d6e6aee --- /dev/null +++ b/data/hfopenllm_v2/open-atlas/Atlas-Flash-7B-Preview/6fd7bb75-6648-4bfe-a232-f9efe4b7c45e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/open-atlas_Atlas-Flash-7B-Preview/1762652580.428412", + "retrieved_timestamp": "1762652580.428413", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "open-atlas/Atlas-Flash-7B-Preview", + "developer": "open-atlas", + "inference_platform": "unknown", + "id": "open-atlas/Atlas-Flash-7B-Preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3907543096761038 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3541994356643969 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25755287009063443 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38358333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27842420212765956 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/open-neo/Kyro-n1-3B/0a8b6c55-da69-4f4d-98cc-9d3f5b82d9e2.json b/data/hfopenllm_v2/open-neo/Kyro-n1-3B/0a8b6c55-da69-4f4d-98cc-9d3f5b82d9e2.json new file mode 100644 index 000000000..a029bdd83 --- /dev/null +++ b/data/hfopenllm_v2/open-neo/Kyro-n1-3B/0a8b6c55-da69-4f4d-98cc-9d3f5b82d9e2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/open-neo_Kyro-n1-3B/1762652580.428618", + "retrieved_timestamp": "1762652580.428618", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "open-neo/Kyro-n1-3B", + "developer": "open-neo", + "inference_platform": "unknown", + "id": "open-neo/Kyro-n1-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45949746672163194 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46853756471175373 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2854984894259819 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40879166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34225398936170215 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/open-neo/Kyro-n1-7B/f69621cf-6e46-4805-b8f2-d7a7cba3a0e4.json b/data/hfopenllm_v2/open-neo/Kyro-n1-7B/f69621cf-6e46-4805-b8f2-d7a7cba3a0e4.json new file mode 100644 index 000000000..12764a765 --- /dev/null +++ b/data/hfopenllm_v2/open-neo/Kyro-n1-7B/f69621cf-6e46-4805-b8f2-d7a7cba3a0e4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/open-neo_Kyro-n1-7B/1762652580.42885", + "retrieved_timestamp": "1762652580.42885", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "open-neo/Kyro-n1-7B", + "developer": "open-neo", + "inference_platform": "unknown", + "id": "open-neo/Kyro-n1-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5572669406064796 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5386561160683788 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38972809667673713 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38841666666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.433344414893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/open-thoughts/OpenThinker-7B/feb0d715-d1bc-4b0e-8585-a0646c07244b.json b/data/hfopenllm_v2/open-thoughts/OpenThinker-7B/feb0d715-d1bc-4b0e-8585-a0646c07244b.json new file mode 100644 index 000000000..2bdfc9c48 --- /dev/null +++ b/data/hfopenllm_v2/open-thoughts/OpenThinker-7B/feb0d715-d1bc-4b0e-8585-a0646c07244b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/open-thoughts_OpenThinker-7B/1762652580.4290519", + "retrieved_timestamp": "1762652580.4290528", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "open-thoughts/OpenThinker-7B", + "developer": "open-thoughts", + "inference_platform": "unknown", + "id": "open-thoughts/OpenThinker-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4088895242401273 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5342727589615611 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4259818731117825 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38199999999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41647273936170215 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/AI-Sweden-Models/gpt-sw3-40b/e791a3d6-928e-43c9-96ee-156901e8b18b.json b/data/hfopenllm_v2/openai/AI-Sweden-Models/gpt-sw3-40b/e791a3d6-928e-43c9-96ee-156901e8b18b.json new file mode 100644 index 000000000..2e60ba5c5 --- /dev/null +++ b/data/hfopenllm_v2/openai/AI-Sweden-Models/gpt-sw3-40b/e791a3d6-928e-43c9-96ee-156901e8b18b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/AI-Sweden-Models_gpt-sw3-40b/1762652579.475041", + "retrieved_timestamp": "1762652579.475042", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "AI-Sweden-Models/gpt-sw3-40b", + "developer": "openai", + "inference_platform": "unknown", + "id": "AI-Sweden-Models/gpt-sw3-40b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1470298807164989 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3267744702957652 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.017371601208459216 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2348993288590604 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36323958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12757646276595744 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPT2LMHeadModel", + "params_billions": 39.927 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/DeepAutoAI/causal_gpt2/bf683545-a6df-4deb-9a91-ea6b8eae8be7.json b/data/hfopenllm_v2/openai/DeepAutoAI/causal_gpt2/bf683545-a6df-4deb-9a91-ea6b8eae8be7.json new file mode 100644 index 000000000..bfb3a6af7 --- /dev/null +++ b/data/hfopenllm_v2/openai/DeepAutoAI/causal_gpt2/bf683545-a6df-4deb-9a91-ea6b8eae8be7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_causal_gpt2/1762652579.548641", + "retrieved_timestamp": "1762652579.5486422", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepAutoAI/causal_gpt2", + "developer": "openai", + "inference_platform": "unknown", + "id": "DeepAutoAI/causal_gpt2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1812767900282362 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30257073962835446 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.005287009063444109 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42695833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11311502659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.124 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/DeepAutoAI/d2nwg_causal_gpt2/6b5b21c7-9284-4117-a63c-65628604e1a5.json b/data/hfopenllm_v2/openai/DeepAutoAI/d2nwg_causal_gpt2/6b5b21c7-9284-4117-a63c-65628604e1a5.json new file mode 100644 index 000000000..e49ad022f --- /dev/null +++ b/data/hfopenllm_v2/openai/DeepAutoAI/d2nwg_causal_gpt2/6b5b21c7-9284-4117-a63c-65628604e1a5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_d2nwg_causal_gpt2/1762652579.549271", + "retrieved_timestamp": "1762652579.549272", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepAutoAI/d2nwg_causal_gpt2", + "developer": "openai", + "inference_platform": "unknown", + "id": "DeepAutoAI/d2nwg_causal_gpt2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19161823960425006 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30268984588252307 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.004531722054380665 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42971875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11510970744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.124 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/DeepAutoAI/d2nwg_causal_gpt2_v1/f822093a-2bdc-4284-8af2-8048d09afeb2.json b/data/hfopenllm_v2/openai/DeepAutoAI/d2nwg_causal_gpt2_v1/f822093a-2bdc-4284-8af2-8048d09afeb2.json new file mode 100644 index 000000000..61b74cf1f --- /dev/null +++ b/data/hfopenllm_v2/openai/DeepAutoAI/d2nwg_causal_gpt2_v1/f822093a-2bdc-4284-8af2-8048d09afeb2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/DeepAutoAI_d2nwg_causal_gpt2_v1/1762652579.549553", + "retrieved_timestamp": "1762652579.5495539", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "DeepAutoAI/d2nwg_causal_gpt2_v1", + "developer": "openai", + "inference_platform": "unknown", + "id": "DeepAutoAI/d2nwg_causal_gpt2_v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1988623518929773 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29918984588252306 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0037764350453172208 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4336875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11353058510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.124 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/EleutherAI/gpt-j-6b/1f140f2a-c9cb-49fb-8bcd-e59f699fd12a.json b/data/hfopenllm_v2/openai/EleutherAI/gpt-j-6b/1f140f2a-c9cb-49fb-8bcd-e59f699fd12a.json new file mode 100644 index 000000000..47449da61 --- /dev/null +++ b/data/hfopenllm_v2/openai/EleutherAI/gpt-j-6b/1f140f2a-c9cb-49fb-8bcd-e59f699fd12a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EleutherAI_gpt-j-6b/1762652579.5928068", + "retrieved_timestamp": "1762652579.592808", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EleutherAI/gpt-j-6b", + "developer": "openai", + "inference_platform": "unknown", + "id": "EleutherAI/gpt-j-6b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2522185578708937 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3191044431037278 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24580536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36575 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12408577127659574 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPTJForCausalLM", + "params_billions": 6.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/EleutherAI/gpt-neo-1.3B/dc615b98-9255-4a6e-afe2-c79d59362520.json b/data/hfopenllm_v2/openai/EleutherAI/gpt-neo-1.3B/dc615b98-9255-4a6e-afe2-c79d59362520.json new file mode 100644 index 000000000..1ca579f6d --- /dev/null +++ b/data/hfopenllm_v2/openai/EleutherAI/gpt-neo-1.3B/dc615b98-9255-4a6e-afe2-c79d59362520.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EleutherAI_gpt-neo-1.3B/1762652579.59305", + "retrieved_timestamp": "1762652579.59305", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EleutherAI/gpt-neo-1.3B", + "developer": "openai", + "inference_platform": "unknown", + "id": "EleutherAI/gpt-neo-1.3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20790502533278366 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30392315869356407 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38165625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1163563829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPTNeoForCausalLM", + "params_billions": 1.366 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/EleutherAI/gpt-neo-125m/cff09938-5918-4825-b974-194019b48165.json b/data/hfopenllm_v2/openai/EleutherAI/gpt-neo-125m/cff09938-5918-4825-b974-194019b48165.json new file mode 100644 index 000000000..2a048d738 --- /dev/null +++ b/data/hfopenllm_v2/openai/EleutherAI/gpt-neo-125m/cff09938-5918-4825-b974-194019b48165.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EleutherAI_gpt-neo-125m/1762652579.593268", + "retrieved_timestamp": "1762652579.593268", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EleutherAI/gpt-neo-125m", + "developer": "openai", + "inference_platform": "unknown", + "id": "EleutherAI/gpt-neo-125m" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19054442213327305 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3115156885791523 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3593333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10255984042553191 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPTNeoForCausalLM", + "params_billions": 0.15 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/EleutherAI/gpt-neo-2.7B/6ebf0016-f747-4ccd-82fa-db427733b2f9.json b/data/hfopenllm_v2/openai/EleutherAI/gpt-neo-2.7B/6ebf0016-f747-4ccd-82fa-db427733b2f9.json new file mode 100644 index 000000000..c63d30341 --- /dev/null +++ b/data/hfopenllm_v2/openai/EleutherAI/gpt-neo-2.7B/6ebf0016-f747-4ccd-82fa-db427733b2f9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EleutherAI_gpt-neo-2.7B/1762652579.5934908", + "retrieved_timestamp": "1762652579.5934908", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EleutherAI/gpt-neo-2.7B", + "developer": "openai", + "inference_platform": "unknown", + "id": "EleutherAI/gpt-neo-2.7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2589628851447493 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3139516033315253 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3553645833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11627327127659574 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPTNeoForCausalLM", + "params_billions": 2.718 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/EleutherAI/gpt-neox-20b/0da6366b-b997-411e-ac76-c25b061e13f8.json b/data/hfopenllm_v2/openai/EleutherAI/gpt-neox-20b/0da6366b-b997-411e-ac76-c25b061e13f8.json new file mode 100644 index 000000000..fbde21f17 --- /dev/null +++ b/data/hfopenllm_v2/openai/EleutherAI/gpt-neox-20b/0da6366b-b997-411e-ac76-c25b061e13f8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/EleutherAI_gpt-neox-20b/1762652579.5937028", + "retrieved_timestamp": "1762652579.593704", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "EleutherAI/gpt-neox-20b", + "developer": "openai", + "inference_platform": "unknown", + "id": "EleutherAI/gpt-neox-20b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2586880587951081 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31650380320877564 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24328859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36466666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1155252659574468 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 20.739 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/Kimargin/GPT-NEO-1.3B-wiki/9084d476-dee7-4447-9955-e0f066bd35ba.json b/data/hfopenllm_v2/openai/Kimargin/GPT-NEO-1.3B-wiki/9084d476-dee7-4447-9955-e0f066bd35ba.json new file mode 100644 index 000000000..9b826c1c2 --- /dev/null +++ b/data/hfopenllm_v2/openai/Kimargin/GPT-NEO-1.3B-wiki/9084d476-dee7-4447-9955-e0f066bd35ba.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Kimargin_GPT-NEO-1.3B-wiki/1762652579.6992168", + "retrieved_timestamp": "1762652579.699218", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Kimargin/GPT-NEO-1.3B-wiki", + "developer": "openai", + "inference_platform": "unknown", + "id": "Kimargin/GPT-NEO-1.3B-wiki" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19206815693471102 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3026339952046975 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24496644295302014 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3882604166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10987367021276596 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoForCausalLM", + "params_billions": 1.316 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/NYTK/PULI-GPTrio/685fc779-4f8b-4110-82da-5a49697153a0.json b/data/hfopenllm_v2/openai/NYTK/PULI-GPTrio/685fc779-4f8b-4110-82da-5a49697153a0.json new file mode 100644 index 000000000..dfeb6e3ad --- /dev/null +++ b/data/hfopenllm_v2/openai/NYTK/PULI-GPTrio/685fc779-4f8b-4110-82da-5a49697153a0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/NYTK_PULI-GPTrio/1762652579.769266", + "retrieved_timestamp": "1762652579.769266", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "NYTK/PULI-GPTrio", + "developer": "openai", + "inference_platform": "unknown", + "id": "NYTK/PULI-GPTrio" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21797164855915638 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30600290906237543 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38187499999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11369680851063829 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 7.673 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/Sharathhebbar24/chat_gpt2_dpo/ce90bca7-f999-44ef-9b72-1fdb4ac68eb0.json b/data/hfopenllm_v2/openai/Sharathhebbar24/chat_gpt2_dpo/ce90bca7-f999-44ef-9b72-1fdb4ac68eb0.json new file mode 100644 index 000000000..ea3a1a2ee --- /dev/null +++ b/data/hfopenllm_v2/openai/Sharathhebbar24/chat_gpt2_dpo/ce90bca7-f999-44ef-9b72-1fdb4ac68eb0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/Sharathhebbar24_chat_gpt2_dpo/1762652579.8799832", + "retrieved_timestamp": "1762652579.8799841", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "Sharathhebbar24/chat_gpt2_dpo", + "developer": "openai", + "inference_platform": "unknown", + "id": "Sharathhebbar24/chat_gpt2_dpo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09861944086135896 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29022988561565644 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.005287009063444109 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38184375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11419547872340426 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.124 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/distilbert/distilgpt2/a21cd9f0-6006-4587-bcd1-f1d42dfce7ba.json b/data/hfopenllm_v2/openai/distilbert/distilgpt2/a21cd9f0-6006-4587-bcd1-f1d42dfce7ba.json new file mode 100644 index 000000000..f1c026e20 --- /dev/null +++ b/data/hfopenllm_v2/openai/distilbert/distilgpt2/a21cd9f0-6006-4587-bcd1-f1d42dfce7ba.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/distilbert_distilgpt2/1762652580.1266282", + "retrieved_timestamp": "1762652580.126629", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "distilbert/distilgpt2", + "developer": "openai", + "inference_platform": "unknown", + "id": "distilbert/distilgpt2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06110010328151527 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3037988148650536 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42072916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11868351063829788 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.088 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/gpt2/43c1b559-e9e8-477e-95d9-1c28ac5d265c.json b/data/hfopenllm_v2/openai/gpt2/43c1b559-e9e8-477e-95d9-1c28ac5d265c.json new file mode 100644 index 000000000..583e7ce09 --- /dev/null +++ b/data/hfopenllm_v2/openai/gpt2/43c1b559-e9e8-477e-95d9-1c28ac5d265c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/gpt2/1762652580.1809301", + "retrieved_timestamp": "1762652580.180931", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gpt2", + "developer": "openai", + "inference_platform": "unknown", + "id": "openai/gpt2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1934168007553292 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036385401516729 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0030211480362537764 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43241666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1149434840425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.137 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/gpt2/e28a8f11-68f6-464f-b1b8-21938cb41aa3.json b/data/hfopenllm_v2/openai/gpt2/e28a8f11-68f6-464f-b1b8-21938cb41aa3.json new file mode 100644 index 000000000..36ae93460 --- /dev/null +++ b/data/hfopenllm_v2/openai/gpt2/e28a8f11-68f6-464f-b1b8-21938cb41aa3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/gpt2/1762652580.181142", + "retrieved_timestamp": "1762652580.181143", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "gpt2", + "developer": "openai", + "inference_platform": "unknown", + "id": "openai/gpt2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08333333333333333 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30833333333333335 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23333333333333334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4333333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.137 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/langgptai/Qwen-las-v0.1/cfaa9b4e-8588-45a5-9b9d-4268a71b128b.json b/data/hfopenllm_v2/openai/langgptai/Qwen-las-v0.1/cfaa9b4e-8588-45a5-9b9d-4268a71b128b.json new file mode 100644 index 000000000..47b7c2195 --- /dev/null +++ b/data/hfopenllm_v2/openai/langgptai/Qwen-las-v0.1/cfaa9b4e-8588-45a5-9b9d-4268a71b128b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/langgptai_Qwen-las-v0.1/1762652580.313808", + "retrieved_timestamp": "1762652580.313809", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "langgptai/Qwen-las-v0.1", + "developer": "openai", + "inference_platform": "unknown", + "id": "langgptai/Qwen-las-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33010412372504955 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38925525629956187 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03700906344410876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24664429530201343 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37009374999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2325465425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 7.901 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/meraGPT/mera-mix-4x7B/152e8d2f-8470-45b2-8318-9b6c44438978.json b/data/hfopenllm_v2/openai/meraGPT/mera-mix-4x7B/152e8d2f-8470-45b2-8318-9b6c44438978.json new file mode 100644 index 000000000..4e3afd2bf --- /dev/null +++ b/data/hfopenllm_v2/openai/meraGPT/mera-mix-4x7B/152e8d2f-8470-45b2-8318-9b6c44438978.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/meraGPT_mera-mix-4x7B/1762652580.345789", + "retrieved_timestamp": "1762652580.34579", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "meraGPT/mera-mix-4x7B", + "developer": "openai", + "inference_platform": "unknown", + "id": "meraGPT/mera-mix-4x7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4831779677921249 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40189899163661713 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40565625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27476728723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 24.154 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/microsoft/DialoGPT-medium/3c70b5d5-784d-41fb-8ca7-eabd6a96a195.json b/data/hfopenllm_v2/openai/microsoft/DialoGPT-medium/3c70b5d5-784d-41fb-8ca7-eabd6a96a195.json new file mode 100644 index 000000000..e4b7e0c8d --- /dev/null +++ b/data/hfopenllm_v2/openai/microsoft/DialoGPT-medium/3c70b5d5-784d-41fb-8ca7-eabd6a96a195.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/microsoft_DialoGPT-medium/1762652580.353813", + "retrieved_timestamp": "1762652580.3538141", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "microsoft/DialoGPT-medium", + "developer": "openai", + "inference_platform": "unknown", + "id": "microsoft/DialoGPT-medium" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14790422744983311 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3014156380141994 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4286666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1118683510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.345 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/openai-community/gpt2-large/15499118-2a47-4a6f-8c86-158a87a9350f.json b/data/hfopenllm_v2/openai/openai-community/gpt2-large/15499118-2a47-4a6f-8c86-158a87a9350f.json new file mode 100644 index 000000000..ade51d12d --- /dev/null +++ b/data/hfopenllm_v2/openai/openai-community/gpt2-large/15499118-2a47-4a6f-8c86-158a87a9350f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/openai-community_gpt2-large/1762652580.4297202", + "retrieved_timestamp": "1762652580.429721", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openai-community/gpt2-large", + "developer": "openai", + "inference_platform": "unknown", + "id": "openai-community/gpt2-large" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20478220011790937 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30688418760118824 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015104 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3788645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11419547872340426 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.812 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/openai-community/gpt2-medium/f68c55dc-0d74-4c75-ac57-62f23cce01b5.json b/data/hfopenllm_v2/openai/openai-community/gpt2-medium/f68c55dc-0d74-4c75-ac57-62f23cce01b5.json new file mode 100644 index 000000000..1468d12d9 --- /dev/null +++ b/data/hfopenllm_v2/openai/openai-community/gpt2-medium/f68c55dc-0d74-4c75-ac57-62f23cce01b5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/openai-community_gpt2-medium/1762652580.4299362", + "retrieved_timestamp": "1762652580.429937", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openai-community/gpt2-medium", + "developer": "openai", + "inference_platform": "unknown", + "id": "openai-community/gpt2-medium" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22084402718121252 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3050280232176266 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0075528700906344415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3884479166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11818484042553191 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.38 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/openai-community/gpt2-xl/39a68088-0a01-482d-81b3-c6a84d98d0ca.json b/data/hfopenllm_v2/openai/openai-community/gpt2-xl/39a68088-0a01-482d-81b3-c6a84d98d0ca.json new file mode 100644 index 000000000..c4c9ff0a9 --- /dev/null +++ b/data/hfopenllm_v2/openai/openai-community/gpt2-xl/39a68088-0a01-482d-81b3-c6a84d98d0ca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/openai-community_gpt2-xl/1762652580.430138", + "retrieved_timestamp": "1762652580.430138", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openai-community/gpt2-xl", + "developer": "openai", + "inference_platform": "unknown", + "id": "openai-community/gpt2-xl" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20385798570016445 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30085761123260785 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.009818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37095833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11311502659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 1.608 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/openai-community/gpt2/435a8268-cf26-4c78-8789-758dd32759b1.json b/data/hfopenllm_v2/openai/openai-community/gpt2/435a8268-cf26-4c78-8789-758dd32759b1.json new file mode 100644 index 000000000..5cd981ef5 --- /dev/null +++ b/data/hfopenllm_v2/openai/openai-community/gpt2/435a8268-cf26-4c78-8789-758dd32759b1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/openai-community_gpt2/1762652580.429537", + "retrieved_timestamp": "1762652580.429537", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openai-community/gpt2", + "developer": "openai", + "inference_platform": "unknown", + "id": "openai-community/gpt2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17795449407571912 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30165801067653053 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.005287009063444109 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43902083333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11652260638297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.137 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/openai-community/gpt2/a18409fa-1372-401e-8ae5-f25eaa6386d2.json b/data/hfopenllm_v2/openai/openai-community/gpt2/a18409fa-1372-401e-8ae5-f25eaa6386d2.json new file mode 100644 index 000000000..b91d1349f --- /dev/null +++ b/data/hfopenllm_v2/openai/openai-community/gpt2/a18409fa-1372-401e-8ae5-f25eaa6386d2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/openai-community_gpt2/1762652580.42929", + "retrieved_timestamp": "1762652580.429291", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openai-community/gpt2", + "developer": "openai", + "inference_platform": "unknown", + "id": "openai-community/gpt2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17925327021192655 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3035711244213359 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0022658610271903325 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44705208333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11594082446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.137 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/postbot/gpt2-medium-emailgen/a661e335-7ed5-43b9-aa3b-1e027cebdb75.json b/data/hfopenllm_v2/openai/postbot/gpt2-medium-emailgen/a661e335-7ed5-43b9-aa3b-1e027cebdb75.json new file mode 100644 index 000000000..33fc078b9 --- /dev/null +++ b/data/hfopenllm_v2/openai/postbot/gpt2-medium-emailgen/a661e335-7ed5-43b9-aa3b-1e027cebdb75.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/postbot_gpt2-medium-emailgen/1762652580.4421701", + "retrieved_timestamp": "1762652580.4421709", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "postbot/gpt2-medium-emailgen", + "developer": "openai", + "inference_platform": "unknown", + "id": "postbot/gpt2-medium-emailgen" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1492030035860406 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31304286003933807 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3911145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1146941489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.38 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/sumink/ftgpt/ba4e0ed2-201a-4007-afbe-65e8276d853c.json b/data/hfopenllm_v2/openai/sumink/ftgpt/ba4e0ed2-201a-4007-afbe-65e8276d853c.json new file mode 100644 index 000000000..7bd084c3a --- /dev/null +++ b/data/hfopenllm_v2/openai/sumink/ftgpt/ba4e0ed2-201a-4007-afbe-65e8276d853c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_ftgpt/1762652580.5475452", + "retrieved_timestamp": "1762652580.5475461", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/ftgpt", + "developer": "openai", + "inference_platform": "unknown", + "id": "sumink/ftgpt" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0787100449030794 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29190853217047663 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41384375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1171875 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "GPT2LMHeadModel", + "params_billions": 0.124 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/togethercomputer/GPT-JT-6B-v1/03196258-8cc8-4c57-badf-9085ede8d658.json b/data/hfopenllm_v2/openai/togethercomputer/GPT-JT-6B-v1/03196258-8cc8-4c57-badf-9085ede8d658.json new file mode 100644 index 000000000..6e5846045 --- /dev/null +++ b/data/hfopenllm_v2/openai/togethercomputer/GPT-JT-6B-v1/03196258-8cc8-4c57-badf-9085ede8d658.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/togethercomputer_GPT-JT-6B-v1/1762652580.574097", + "retrieved_timestamp": "1762652580.5740979", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "togethercomputer/GPT-JT-6B-v1", + "developer": "openai", + "inference_platform": "unknown", + "id": "togethercomputer/GPT-JT-6B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20610646418170453 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33026609127426704 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37365625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16256648936170212 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTJForCausalLM", + "params_billions": 6.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/universalml/NepaliGPT-2.0/07a71559-e618-4ba7-8721-bc6834f1c727.json b/data/hfopenllm_v2/openai/universalml/NepaliGPT-2.0/07a71559-e618-4ba7-8721-bc6834f1c727.json new file mode 100644 index 000000000..acb735e5e --- /dev/null +++ b/data/hfopenllm_v2/openai/universalml/NepaliGPT-2.0/07a71559-e618-4ba7-8721-bc6834f1c727.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/universalml_NepaliGPT-2.0/1762652580.578092", + "retrieved_timestamp": "1762652580.578093", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "universalml/NepaliGPT-2.0", + "developer": "openai", + "inference_platform": "unknown", + "id": "universalml/NepaliGPT-2.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03649538779327739 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46604761322722105 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.004531722054380665 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4656770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3299534574468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/yuchenxie/ArlowGPT-3B-Multilingual/fd270937-c889-4a2b-aada-341a44c80d46.json b/data/hfopenllm_v2/openai/yuchenxie/ArlowGPT-3B-Multilingual/fd270937-c889-4a2b-aada-341a44c80d46.json new file mode 100644 index 000000000..462b6ac50 --- /dev/null +++ b/data/hfopenllm_v2/openai/yuchenxie/ArlowGPT-3B-Multilingual/fd270937-c889-4a2b-aada-341a44c80d46.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yuchenxie_ArlowGPT-3B-Multilingual/1762652580.611115", + "retrieved_timestamp": "1762652580.611116", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yuchenxie/ArlowGPT-3B-Multilingual", + "developer": "openai", + "inference_platform": "unknown", + "id": "yuchenxie/ArlowGPT-3B-Multilingual" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6395486198841297 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4301403132173714 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11253776435045318 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37266666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2816655585106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openai/yuchenxie/ArlowGPT-8B/af890cb6-9d90-41b0-a7a1-c87f3584b93c.json b/data/hfopenllm_v2/openai/yuchenxie/ArlowGPT-8B/af890cb6-9d90-41b0-a7a1-c87f3584b93c.json new file mode 100644 index 000000000..103fcf9be --- /dev/null +++ b/data/hfopenllm_v2/openai/yuchenxie/ArlowGPT-8B/af890cb6-9d90-41b0-a7a1-c87f3584b93c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yuchenxie_ArlowGPT-8B/1762652580.611377", + "retrieved_timestamp": "1762652580.611378", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yuchenxie/ArlowGPT-8B", + "developer": "openai", + "inference_platform": "unknown", + "id": "yuchenxie/ArlowGPT-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7846536079823756 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5080162816130412 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3882291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.378656914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openbmb/MiniCPM-S-1B-sft-llama-format/53b78e02-9491-4f3b-a03b-7c015dde640a.json b/data/hfopenllm_v2/openbmb/MiniCPM-S-1B-sft-llama-format/53b78e02-9491-4f3b-a03b-7c015dde640a.json new file mode 100644 index 000000000..d3973e02e --- /dev/null +++ b/data/hfopenllm_v2/openbmb/MiniCPM-S-1B-sft-llama-format/53b78e02-9491-4f3b-a03b-7c015dde640a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/openbmb_MiniCPM-S-1B-sft-llama-format/1762652580.430347", + "retrieved_timestamp": "1762652580.430348", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openbmb/MiniCPM-S-1B-sft-llama-format", + "developer": "openbmb", + "inference_platform": "unknown", + "id": "openbmb/MiniCPM-S-1B-sft-llama-format" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3328767669782843 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30493136322070497 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030966767371601207 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33167708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1858377659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openchat/openchat-3.5-0106/51cd5c94-7c87-4758-aadc-46acf20ab4b0.json b/data/hfopenllm_v2/openchat/openchat-3.5-0106/51cd5c94-7c87-4758-aadc-46acf20ab4b0.json new file mode 100644 index 000000000..e5855b313 --- /dev/null +++ b/data/hfopenllm_v2/openchat/openchat-3.5-0106/51cd5c94-7c87-4758-aadc-46acf20ab4b0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/openchat_openchat-3.5-0106/1762652580.430586", + "retrieved_timestamp": "1762652580.4305868", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openchat/openchat-3.5-0106", + "developer": "openchat", + "inference_platform": "unknown", + "id": "openchat/openchat-3.5-0106" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5966590867786362 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46169787083960595 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07628398791540786 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42543749999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3291223404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openchat/openchat-3.5-1210/6b3c8f0b-25ed-4ae3-be89-a91815091de0.json b/data/hfopenllm_v2/openchat/openchat-3.5-1210/6b3c8f0b-25ed-4ae3-be89-a91815091de0.json new file mode 100644 index 000000000..f9d9e8c7f --- /dev/null +++ b/data/hfopenllm_v2/openchat/openchat-3.5-1210/6b3c8f0b-25ed-4ae3-be89-a91815091de0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/openchat_openchat-3.5-1210/1762652580.430838", + "retrieved_timestamp": "1762652580.430839", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openchat/openchat-3.5-1210", + "developer": "openchat", + "inference_platform": "unknown", + "id": "openchat/openchat-3.5-1210" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.603678240402133 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4535356846447984 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07854984894259819 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4414375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3142453457446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openchat/openchat-3.6-8b-20240522/2305b9e7-1c2b-42d7-b306-802e32d53e0f.json b/data/hfopenllm_v2/openchat/openchat-3.6-8b-20240522/2305b9e7-1c2b-42d7-b306-802e32d53e0f.json new file mode 100644 index 000000000..f554bad3f --- /dev/null +++ b/data/hfopenllm_v2/openchat/openchat-3.6-8b-20240522/2305b9e7-1c2b-42d7-b306-802e32d53e0f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/openchat_openchat-3.6-8b-20240522/1762652580.4310489", + "retrieved_timestamp": "1762652580.43105", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openchat/openchat-3.6-8b-20240522", + "developer": "openchat", + "inference_platform": "unknown", + "id": "openchat/openchat-3.6-8b-20240522" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5343355629729118 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5338412089001999 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09969788519637462 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3998541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32288896276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openchat/openchat_3.5/c2d66fd5-6c95-4b8e-b87f-c8f0ae00271a.json b/data/hfopenllm_v2/openchat/openchat_3.5/c2d66fd5-6c95-4b8e-b87f-c8f0ae00271a.json new file mode 100644 index 000000000..fce727c97 --- /dev/null +++ b/data/hfopenllm_v2/openchat/openchat_3.5/c2d66fd5-6c95-4b8e-b87f-c8f0ae00271a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/openchat_openchat_3.5/1762652580.431262", + "retrieved_timestamp": "1762652580.431263", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openchat/openchat_3.5", + "developer": "openchat", + "inference_platform": "unknown", + "id": "openchat/openchat_3.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5931118321608887 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44263196862832893 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07250755287009064 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4228645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31532579787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openchat/openchat_v3.2/2ee1a517-ef52-469e-ac5d-f14e3d72c87c.json b/data/hfopenllm_v2/openchat/openchat_v3.2/2ee1a517-ef52-469e-ac5d-f14e3d72c87c.json new file mode 100644 index 000000000..dfb64c94f --- /dev/null +++ b/data/hfopenllm_v2/openchat/openchat_v3.2/2ee1a517-ef52-469e-ac5d-f14e3d72c87c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/openchat_openchat_v3.2/1762652580.431712", + "retrieved_timestamp": "1762652580.431714", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openchat/openchat_v3.2", + "developer": "openchat", + "inference_platform": "unknown", + "id": "openchat/openchat_v3.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2980558252104416 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4330564283474314 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.433625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2421875 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/openchat/openchat_v3.2_super/b7b3fcb7-bbc7-4f39-9daa-7a54362d5d68.json b/data/hfopenllm_v2/openchat/openchat_v3.2_super/b7b3fcb7-bbc7-4f39-9daa-7a54362d5d68.json new file mode 100644 index 000000000..72db402a3 --- /dev/null +++ b/data/hfopenllm_v2/openchat/openchat_v3.2_super/b7b3fcb7-bbc7-4f39-9daa-7a54362d5d68.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/openchat_openchat_v3.2_super/1762652580.431961", + "retrieved_timestamp": "1762652580.431962", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "openchat/openchat_v3.2_super", + "developer": "openchat", + "inference_platform": "unknown", + "id": "openchat/openchat_v3.2_super" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2861906408329898 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42212089838803973 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.021148036253776436 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41613541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24251994680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/oxyapi/oxy-1-small/62126b06-5bd2-451f-a76c-7c227690f149.json b/data/hfopenllm_v2/oxyapi/oxy-1-small/62126b06-5bd2-451f-a76c-7c227690f149.json new file mode 100644 index 000000000..3284a538c --- /dev/null +++ b/data/hfopenllm_v2/oxyapi/oxy-1-small/62126b06-5bd2-451f-a76c-7c227690f149.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/oxyapi_oxy-1-small/1762652580.432582", + "retrieved_timestamp": "1762652580.432582", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "oxyapi/oxy-1-small", + "developer": "oxyapi", + "inference_platform": "unknown", + "id": "oxyapi/oxy-1-small" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6244608749229821 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5884593784818278 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36027190332326287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3716442953020134 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4486666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5000831117021277 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ozone-ai/0x-lite/9b5b23bc-44bb-4d47-91a2-18e23571743d.json b/data/hfopenllm_v2/ozone-ai/0x-lite/9b5b23bc-44bb-4d47-91a2-18e23571743d.json new file mode 100644 index 000000000..69059d21b --- /dev/null +++ b/data/hfopenllm_v2/ozone-ai/0x-lite/9b5b23bc-44bb-4d47-91a2-18e23571743d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ozone-ai_0x-lite/1762652580.432846", + "retrieved_timestamp": "1762652580.432847", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ozone-ai/0x-lite", + "developer": "ozone-ai", + "inference_platform": "unknown", + "id": "ozone-ai/0x-lite" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7739874643723099 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6340580988016683 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5045317220543807 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31963087248322153 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4220625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5183676861702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ozone-research/Chirp-01/69a65ae3-71fe-4e33-be2d-20bc0c25969a.json b/data/hfopenllm_v2/ozone-research/Chirp-01/69a65ae3-71fe-4e33-be2d-20bc0c25969a.json new file mode 100644 index 000000000..19fba9249 --- /dev/null +++ b/data/hfopenllm_v2/ozone-research/Chirp-01/69a65ae3-71fe-4e33-be2d-20bc0c25969a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ozone-research_Chirp-01/1762652580.433142", + "retrieved_timestamp": "1762652580.4331431", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ozone-research/Chirp-01", + "developer": "ozone-research", + "inference_platform": "unknown", + "id": "ozone-research/Chirp-01" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6347524568145853 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4649560260501419 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3466767371601209 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2718120805369128 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4487291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3508144946808511 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V1/d86238d3-3a4e-467a-8ce1-e6a4a903aa3b.json b/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V1/d86238d3-3a4e-467a-8ce1-e6a4a903aa3b.json new file mode 100644 index 000000000..a9ee98b1d --- /dev/null +++ b/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V1/d86238d3-3a4e-467a-8ce1-e6a4a903aa3b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/paloalma_ECE-TW3-JRGL-V1/1762652580.433397", + "retrieved_timestamp": "1762652580.433398", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "paloalma/ECE-TW3-JRGL-V1", + "developer": "paloalma", + "inference_platform": "unknown", + "id": "paloalma/ECE-TW3-JRGL-V1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5534947273235016 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6283667540784627 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13141993957703926 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34731543624161076 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46208333333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.422124335106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 68.977 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V2/d8d1a5b1-cc9a-4af9-b95f-db78f7edf70e.json b/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V2/d8d1a5b1-cc9a-4af9-b95f-db78f7edf70e.json new file mode 100644 index 000000000..2b5799638 --- /dev/null +++ b/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V2/d8d1a5b1-cc9a-4af9-b95f-db78f7edf70e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/paloalma_ECE-TW3-JRGL-V2/1762652580.433646", + "retrieved_timestamp": "1762652580.4336472", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "paloalma/ECE-TW3-JRGL-V2", + "developer": "paloalma", + "inference_platform": "unknown", + "id": "paloalma/ECE-TW3-JRGL-V2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2254894790267601 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6030988136029874 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18504531722054382 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47932291666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4587765957446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.288 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V5/9468fda5-a233-4d19-9a99-602e694f4a02.json b/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V5/9468fda5-a233-4d19-9a99-602e694f4a02.json new file mode 100644 index 000000000..f67c6514b --- /dev/null +++ b/data/hfopenllm_v2/paloalma/ECE-TW3-JRGL-V5/9468fda5-a233-4d19-9a99-602e694f4a02.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/paloalma_ECE-TW3-JRGL-V5/1762652580.433843", + "retrieved_timestamp": "1762652580.4338439", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "paloalma/ECE-TW3-JRGL-V5", + "developer": "paloalma", + "inference_platform": "unknown", + "id": "paloalma/ECE-TW3-JRGL-V5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4552509563513699 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6024712037668832 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18353474320241692 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3414429530201342 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4620520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46476063829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 72.289 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/paloalma/Le_Triomphant-ECE-TW3/49f92222-f6cd-47e5-968d-10dc4345dd90.json b/data/hfopenllm_v2/paloalma/Le_Triomphant-ECE-TW3/49f92222-f6cd-47e5-968d-10dc4345dd90.json new file mode 100644 index 000000000..661f3ca6a --- /dev/null +++ b/data/hfopenllm_v2/paloalma/Le_Triomphant-ECE-TW3/49f92222-f6cd-47e5-968d-10dc4345dd90.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/paloalma_Le_Triomphant-ECE-TW3/1762652580.434039", + "retrieved_timestamp": "1762652580.434039", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "paloalma/Le_Triomphant-ECE-TW3", + "developer": "paloalma", + "inference_platform": "unknown", + "id": "paloalma/Le_Triomphant-ECE-TW3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5402055435134332 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6112057897556996 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19486404833836857 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.348993288590604 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4725 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.476313164893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 72.289 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/paloalma/TW3-JRGL-v2/525f2e27-bd77-49e9-85db-61efddbdd186.json b/data/hfopenllm_v2/paloalma/TW3-JRGL-v2/525f2e27-bd77-49e9-85db-61efddbdd186.json new file mode 100644 index 000000000..3d5869922 --- /dev/null +++ b/data/hfopenllm_v2/paloalma/TW3-JRGL-v2/525f2e27-bd77-49e9-85db-61efddbdd186.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/paloalma_TW3-JRGL-v2/1762652580.43424", + "retrieved_timestamp": "1762652580.434241", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "paloalma/TW3-JRGL-v2", + "developer": "paloalma", + "inference_platform": "unknown", + "id": "paloalma/TW3-JRGL-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5316127874040878 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6137525505395743 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17900302114803626 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35906040268456374 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48583333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4857878989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 72.289 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/Al_Dente_v1_8b/9924f2bd-abe5-431c-aa06-be24952ca363.json b/data/hfopenllm_v2/pankajmathur/Al_Dente_v1_8b/9924f2bd-abe5-431c-aa06-be24952ca363.json new file mode 100644 index 000000000..e85fb75be --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/Al_Dente_v1_8b/9924f2bd-abe5-431c-aa06-be24952ca363.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_Al_Dente_v1_8b/1762652580.434438", + "retrieved_timestamp": "1762652580.434439", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/Al_Dente_v1_8b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/Al_Dente_v1_8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3693721547715617 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48347371404380524 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3987083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2859873670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/model_007_13b_v2/a108864f-40d6-492b-8440-1cbb5d87a5fe.json b/data/hfopenllm_v2/pankajmathur/model_007_13b_v2/a108864f-40d6-492b-8440-1cbb5d87a5fe.json new file mode 100644 index 000000000..beaf1a100 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/model_007_13b_v2/a108864f-40d6-492b-8440-1cbb5d87a5fe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_model_007_13b_v2/1762652580.434693", + "retrieved_timestamp": "1762652580.4346938", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/model_007_13b_v2", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/model_007_13b_v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30564901129004374 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4702292766687601 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.021148036253776436 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46109375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24609375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_3b/bebbfd98-fdba-413d-9e7d-06af8bd4d5a7.json b/data/hfopenllm_v2/pankajmathur/orca_mini_3b/bebbfd98-fdba-413d-9e7d-06af8bd4d5a7.json new file mode 100644 index 000000000..a1238a456 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_3b/bebbfd98-fdba-413d-9e7d-06af8bd4d5a7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_3b/1762652580.434913", + "retrieved_timestamp": "1762652580.434913", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_3b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07421419611076388 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196070040004752 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.008308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24580536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3349270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11452792553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.426 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_7b/773c97e1-0e43-46ae-a134-8a08ca9b5094.json b/data/hfopenllm_v2/pankajmathur/orca_mini_7b/773c97e1-0e43-46ae-a134-8a08ca9b5094.json new file mode 100644 index 000000000..3b8637f57 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_7b/773c97e1-0e43-46ae-a134-8a08ca9b5094.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_7b/1762652580.435124", + "retrieved_timestamp": "1762652580.4351249", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_7b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04121619525082337 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3332228472650342 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36975 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12458444148936171 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v2_7b/036c4f96-2d08-40a1-968d-293e0b3a1ed0.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v2_7b/036c4f96-2d08-40a1-968d-293e0b3a1ed0.json new file mode 100644 index 000000000..59209398a --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v2_7b/036c4f96-2d08-40a1-968d-293e0b3a1ed0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v2_7b/1762652580.435575", + "retrieved_timestamp": "1762652580.435576", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v2_7b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v2_7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13578859647956312 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35363417847864514 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.011329305135951661 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24916107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35933333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1541722074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v3_13b/d3ba7ff3-e0d7-48e3-b63d-9648a193679f.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v3_13b/d3ba7ff3-e0d7-48e3-b63d-9648a193679f.json new file mode 100644 index 000000000..8c6901ec8 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v3_13b/d3ba7ff3-e0d7-48e3-b63d-9648a193679f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v3_13b/1762652580.435779", + "retrieved_timestamp": "1762652580.43578", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v3_13b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v3_13b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28966253983873896 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4710970361474938 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.021148036253776436 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45979166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23046875 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v3_70b/beae9826-35b2-4758-a20a-10c8402daa42.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v3_70b/beae9826-35b2-4758-a20a-10c8402daa42.json new file mode 100644 index 000000000..e01373dca --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v3_70b/beae9826-35b2-4758-a20a-10c8402daa42.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v3_70b/1762652580.43598", + "retrieved_timestamp": "1762652580.435981", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v3_70b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v3_70b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4014703209705803 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5949312065598904 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179530201342282 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5078541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3757480053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v3_7b/69cb8c68-5847-48f0-b2bd-0756ec761837.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v3_7b/69cb8c68-5847-48f0-b2bd-0756ec761837.json new file mode 100644 index 000000000..86b445032 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v3_7b/69cb8c68-5847-48f0-b2bd-0756ec761837.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v3_7b/1762652580.436181", + "retrieved_timestamp": "1762652580.436182", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v3_7b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v3_7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2820937335159599 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4095332668279368 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24664429530201343 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49823958333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20836103723404256 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b/12a231e0-deed-4d2b-9904-79a8b543d200.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b/12a231e0-deed-4d2b-9904-79a8b543d200.json new file mode 100644 index 000000000..3883087b9 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b/12a231e0-deed-4d2b-9904-79a8b543d200.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v5_8b/1762652580.436376", + "retrieved_timestamp": "1762652580.436377", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v5_8b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v5_8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48060479527653294 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5064242853619262 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09894259818731117 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4000104166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3075964095744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_dpo/1dad9bda-fbc8-499b-aab0-29be59b6921d.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_dpo/1dad9bda-fbc8-499b-aab0-29be59b6921d.json new file mode 100644 index 000000000..ad7102b9e --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_dpo/1dad9bda-fbc8-499b-aab0-29be59b6921d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v5_8b_dpo/1762652580.436573", + "retrieved_timestamp": "1762652580.436574", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v5_8b_dpo", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v5_8b_dpo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48964746871633935 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5074598658862709 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09743202416918428 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.389375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31158577127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_orpo/cf3f79fc-1fe2-4b55-a808-5664cc1f1809.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_orpo/cf3f79fc-1fe2-4b55-a808-5664cc1f1809.json new file mode 100644 index 000000000..fd2704ca2 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v5_8b_orpo/cf3f79fc-1fe2-4b55-a808-5664cc1f1809.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v5_8b_orpo/1762652580.436766", + "retrieved_timestamp": "1762652580.4367669", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v5_8b_orpo", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v5_8b_orpo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08243239050164675 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.496374377369289 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41312499999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2947140957446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b/e45a0914-baee-4fd4-a231-3495b18db9a9.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b/e45a0914-baee-4fd4-a231-3495b18db9a9.json new file mode 100644 index 000000000..1767992d6 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b/e45a0914-baee-4fd4-a231-3495b18db9a9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v6_8b/1762652580.436963", + "retrieved_timestamp": "1762652580.436963", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v6_8b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v6_8b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.011116060940526692 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30286959112076134 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0037764350453172208 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23825503355704697 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3554583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1124501329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b_dpo/3e875ab6-6065-4400-8038-0fe6437f44d5.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b_dpo/3e875ab6-6065-4400-8038-0fe6437f44d5.json new file mode 100644 index 000000000..7310f4447 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v6_8b_dpo/3e875ab6-6065-4400-8038-0fe6437f44d5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v6_8b_dpo/1762652580.43716", + "retrieved_timestamp": "1762652580.437161", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v6_8b_dpo", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v6_8b_dpo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3882564927725103 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.520280774453148 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06117824773413897 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40903125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.359624335106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v7_72b/702f1485-2941-4e27-9c96-11cee2449df8.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v7_72b/702f1485-2941-4e27-9c96-11cee2449df8.json new file mode 100644 index 000000000..1ef5db330 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v7_72b/702f1485-2941-4e27-9c96-11cee2449df8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v7_72b/1762652580.437353", + "retrieved_timestamp": "1762652580.437354", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v7_72b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v7_72b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5929622291076566 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6842301988001044 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09365558912386707 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3850671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5070416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5621675531914894 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v7_7b/f801b633-5767-4b74-a0db-e474c9349820.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v7_7b/f801b633-5767-4b74-a0db-e474c9349820.json new file mode 100644 index 000000000..3d68c6563 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v7_7b/f801b633-5767-4b74-a0db-e474c9349820.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v7_7b/1762652580.437545", + "retrieved_timestamp": "1762652580.437546", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v7_7b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v7_7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4387646998851935 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5274909601771501 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43597916666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4167220744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v8_1_70b/02201ae1-ec65-496c-bfdb-0dec8aa5308d.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v8_1_70b/02201ae1-ec65-496c-bfdb-0dec8aa5308d.json new file mode 100644 index 000000000..17ae6058e --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v8_1_70b/02201ae1-ec65-496c-bfdb-0dec8aa5308d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v8_1_70b/1762652580.4377441", + "retrieved_timestamp": "1762652580.4377449", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v8_1_70b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v8_1_70b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8571434903832941 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6781305630707934 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3527190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43288590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44370833333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49833776595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_0_3B-Instruct/bc38a266-c3bd-4ecf-8149-6b26bb32803b.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_0_3B-Instruct/bc38a266-c3bd-4ecf-8149-6b26bb32803b.json new file mode 100644 index 000000000..ddd4692f7 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_0_3B-Instruct/bc38a266-c3bd-4ecf-8149-6b26bb32803b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_0_3B-Instruct/1762652580.437941", + "retrieved_timestamp": "1762652580.437942", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v9_0_3B-Instruct", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v9_0_3B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5753766672429155 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4412946064233128 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14652567975830816 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36590625000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2603058510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_1_1B-Instruct/65d0aca2-06ae-4a09-9fb2-2bb54939a554.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_1_1B-Instruct/65d0aca2-06ae-4a09-9fb2-2bb54939a554.json new file mode 100644 index 000000000..0c7077561 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_1_1B-Instruct/65d0aca2-06ae-4a09-9fb2-2bb54939a554.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_1_1B-Instruct/1762652580.438177", + "retrieved_timestamp": "1762652580.438178", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v9_1_1B-Instruct", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v9_1_1B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3629270336041702 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3205118362595434 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04607250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3380625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13738364361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_14B/e10e45b8-0d37-4905-9ebf-acc7922b7ea3.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_14B/e10e45b8-0d37-4905-9ebf-acc7922b7ea3.json new file mode 100644 index 000000000..d5573ac5e --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_14B/e10e45b8-0d37-4905-9ebf-acc7922b7ea3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_2_14B/1762652580.438377", + "retrieved_timestamp": "1762652580.438378", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v9_2_14B", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v9_2_14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7780588837617521 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6856329737542378 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29531722054380666 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37416107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47030208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5255152925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_70b/69093327-3726-469d-9750-b9fa39423310.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_70b/69093327-3726-469d-9750-b9fa39423310.json new file mode 100644 index 000000000..cc570a6d5 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_2_70b/69093327-3726-469d-9750-b9fa39423310.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_2_70b/1762652580.438577", + "retrieved_timestamp": "1762652580.438578", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v9_2_70b", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v9_2_70b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8382591523823455 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6744868732778627 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2938066465256798 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3733221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47098958333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48213098404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_4_70B/e3746ac6-3ee4-4d95-b800-509bed07aec3.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_4_70B/e3746ac6-3ee4-4d95-b800-509bed07aec3.json new file mode 100644 index 000000000..fccb50707 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_4_70B/e3746ac6-3ee4-4d95-b800-509bed07aec3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_4_70B/1762652580.438774", + "retrieved_timestamp": "1762652580.438774", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v9_4_70B", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v9_4_70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8014645584826039 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6418899297276105 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32628398791540786 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36577181208053694 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4647291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45362367021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct/2f2f821b-037b-4f3f-87f6-16703c0dc61a.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct/2f2f821b-037b-4f3f-87f6-16703c0dc61a.json new file mode 100644 index 000000000..36629e1ff --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct/2f2f821b-037b-4f3f-87f6-16703c0dc61a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_5_1B-Instruct/1762652580.438983", + "retrieved_timestamp": "1762652580.438984", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v9_5_1B-Instruct", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v9_5_1B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46379384477630464 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3337001077145985 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030211480362537766 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31815625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13696808510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct_preview/7836190d-33df-45c2-b020-8ccec01de1f3.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct_preview/7836190d-33df-45c2-b020-8ccec01de1f3.json new file mode 100644 index 000000000..91ca5fdf8 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_1B-Instruct_preview/7836190d-33df-45c2-b020-8ccec01de1f3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_5_1B-Instruct_preview/1762652580.439178", + "retrieved_timestamp": "1762652580.439179", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v9_5_1B-Instruct_preview", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v9_5_1B-Instruct_preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3935768206137493 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32769514793198123 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33945833333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13272938829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_3B-Instruct/2ff28335-81a0-4d61-b221-a7edb877da4a.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_3B-Instruct/2ff28335-81a0-4d61-b221-a7edb877da4a.json new file mode 100644 index 000000000..a2a5ea910 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_5_3B-Instruct/2ff28335-81a0-4d61-b221-a7edb877da4a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_5_3B-Instruct/1762652580.439394", + "retrieved_timestamp": "1762652580.4393952", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v9_5_3B-Instruct", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v9_5_3B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7207066140063919 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44963802133275826 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1321752265861027 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2869127516778524 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4269895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2882313829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_1B-Instruct/332f06db-35f1-4759-b3f8-973b1fe6fb9e.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_1B-Instruct/332f06db-35f1-4759-b3f8-973b1fe6fb9e.json new file mode 100644 index 000000000..61677f447 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_1B-Instruct/332f06db-35f1-4759-b3f8-973b1fe6fb9e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_6_1B-Instruct/1762652580.439626", + "retrieved_timestamp": "1762652580.439627", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v9_6_1B-Instruct", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v9_6_1B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6085741388404988 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3561349568441982 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0770392749244713 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33955208333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18085106382978725 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_3B-Instruct/1cc45753-aeed-4804-a6da-413437dbb940.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_3B-Instruct/1cc45753-aeed-4804-a6da-413437dbb940.json new file mode 100644 index 000000000..e286fe444 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_6_3B-Instruct/1cc45753-aeed-4804-a6da-413437dbb940.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_6_3B-Instruct/1762652580.439853", + "retrieved_timestamp": "1762652580.439853", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v9_6_3B-Instruct", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v9_6_3B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7316475839660989 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45683272658133456 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13293051359516617 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4067708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28507313829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_1B-Instruct/fad200e0-05bb-42d7-b7f3-caba938ca09d.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_1B-Instruct/fad200e0-05bb-42d7-b7f3-caba938ca09d.json new file mode 100644 index 000000000..19c14b901 --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_1B-Instruct/fad200e0-05bb-42d7-b7f3-caba938ca09d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_7_1B-Instruct/1762652580.4400692", + "retrieved_timestamp": "1762652580.44007", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v9_7_1B-Instruct", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v9_7_1B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5610136659618701 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3181526961435924 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0445619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35269791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1344747340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_3B-Instruct/42a8b694-ef8f-47d2-8da3-e4db453641b3.json b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_3B-Instruct/42a8b694-ef8f-47d2-8da3-e4db453641b3.json new file mode 100644 index 000000000..ba68896ca --- /dev/null +++ b/data/hfopenllm_v2/pankajmathur/orca_mini_v9_7_3B-Instruct/42a8b694-ef8f-47d2-8da3-e4db453641b3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_7_3B-Instruct/1762652580.44028", + "retrieved_timestamp": "1762652580.4402812", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pankajmathur/orca_mini_v9_7_3B-Instruct", + "developer": "pankajmathur", + "inference_platform": "unknown", + "id": "pankajmathur/orca_mini_v9_7_3B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5618381450107935 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3297133908231881 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.061933534743202415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.361875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13746675531914893 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/paulml/ECE-ILAB-Q1/83024ec4-e4a4-4dd3-adf4-654c90c3a271.json b/data/hfopenllm_v2/paulml/ECE-ILAB-Q1/83024ec4-e4a4-4dd3-adf4-654c90c3a271.json new file mode 100644 index 000000000..3012fb1b9 --- /dev/null +++ b/data/hfopenllm_v2/paulml/ECE-ILAB-Q1/83024ec4-e4a4-4dd3-adf4-654c90c3a271.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/paulml_ECE-ILAB-Q1/1762652580.440484", + "retrieved_timestamp": "1762652580.440484", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "paulml/ECE-ILAB-Q1", + "developer": "paulml", + "inference_platform": "unknown", + "id": "paulml/ECE-ILAB-Q1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7864521691334547 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6717755530661759 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3557401812688822 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38674496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46137500000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.550531914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pints-ai/1.5-Pints-16K-v0.1/8dff3ec1-066f-4f5f-ac57-879d693ee3fb.json b/data/hfopenllm_v2/pints-ai/1.5-Pints-16K-v0.1/8dff3ec1-066f-4f5f-ac57-879d693ee3fb.json new file mode 100644 index 000000000..8dcfca1b3 --- /dev/null +++ b/data/hfopenllm_v2/pints-ai/1.5-Pints-16K-v0.1/8dff3ec1-066f-4f5f-ac57-879d693ee3fb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pints-ai_1.5-Pints-16K-v0.1/1762652580.4407208", + "retrieved_timestamp": "1762652580.440722", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pints-ai/1.5-Pints-16K-v0.1", + "developer": "pints-ai", + "inference_platform": "unknown", + "id": "pints-ai/1.5-Pints-16K-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1635914927946737 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3133077677150869 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23573825503355705 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.357875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1118683510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.566 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/pints-ai/1.5-Pints-2K-v0.1/2ed76213-e562-4b36-bf46-93f09df88ee9.json b/data/hfopenllm_v2/pints-ai/1.5-Pints-2K-v0.1/2ed76213-e562-4b36-bf46-93f09df88ee9.json new file mode 100644 index 000000000..dd565b0d3 --- /dev/null +++ b/data/hfopenllm_v2/pints-ai/1.5-Pints-2K-v0.1/2ed76213-e562-4b36-bf46-93f09df88ee9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/pints-ai_1.5-Pints-2K-v0.1/1762652580.4409652", + "retrieved_timestamp": "1762652580.440966", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "pints-ai/1.5-Pints-2K-v0.1", + "developer": "pints-ai", + "inference_platform": "unknown", + "id": "pints-ai/1.5-Pints-2K-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17615593292463996 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29801943389750435 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2483221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35018749999999993 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11037234042553191 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.566 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/piotr25691/thea-3b-25r/d8fefd3b-78e6-472e-854c-15f40ace7878.json b/data/hfopenllm_v2/piotr25691/thea-3b-25r/d8fefd3b-78e6-472e-854c-15f40ace7878.json new file mode 100644 index 000000000..28a053e0d --- /dev/null +++ b/data/hfopenllm_v2/piotr25691/thea-3b-25r/d8fefd3b-78e6-472e-854c-15f40ace7878.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/piotr25691_thea-3b-25r/1762652580.44117", + "retrieved_timestamp": "1762652580.441171", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "piotr25691/thea-3b-25r", + "developer": "piotr25691", + "inference_platform": "unknown", + "id": "piotr25691/thea-3b-25r" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7344202272193336 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44844100293649863 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1782477341389728 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33145833333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3182347074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/piotr25691/thea-c-3b-25r/828bcb36-3902-4157-9323-a5dcf592a795.json b/data/hfopenllm_v2/piotr25691/thea-c-3b-25r/828bcb36-3902-4157-9323-a5dcf592a795.json new file mode 100644 index 000000000..f0b23ec88 --- /dev/null +++ b/data/hfopenllm_v2/piotr25691/thea-c-3b-25r/828bcb36-3902-4157-9323-a5dcf592a795.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/piotr25691_thea-c-3b-25r/1762652580.441559", + "retrieved_timestamp": "1762652580.441561", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "piotr25691/thea-c-3b-25r", + "developer": "piotr25691", + "inference_platform": "unknown", + "id": "piotr25691/thea-c-3b-25r" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7401904723910335 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4532410175874399 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15256797583081572 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33148958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3178191489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/piotr25691/thea-rp-3b-25r/cd34091b-2639-476c-8419-e6c327cfabc7.json b/data/hfopenllm_v2/piotr25691/thea-rp-3b-25r/cd34091b-2639-476c-8419-e6c327cfabc7.json new file mode 100644 index 000000000..ec9258bd6 --- /dev/null +++ b/data/hfopenllm_v2/piotr25691/thea-rp-3b-25r/cd34091b-2639-476c-8419-e6c327cfabc7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/piotr25691_thea-rp-3b-25r/1762652580.441917", + "retrieved_timestamp": "1762652580.441918", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "piotr25691/thea-rp-3b-25r", + "developer": "piotr25691", + "inference_platform": "unknown", + "id": "piotr25691/thea-rp-3b-25r" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6577835698169745 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4390291036559586 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13217522658610273 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.381875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30601728723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prince-canuma/Ministral-8B-Instruct-2410-HF/f98bc033-55c9-45c1-a101-3881507bb733.json b/data/hfopenllm_v2/prince-canuma/Ministral-8B-Instruct-2410-HF/f98bc033-55c9-45c1-a101-3881507bb733.json new file mode 100644 index 000000000..2f6e1738e --- /dev/null +++ b/data/hfopenllm_v2/prince-canuma/Ministral-8B-Instruct-2410-HF/f98bc033-55c9-45c1-a101-3881507bb733.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prince-canuma_Ministral-8B-Instruct-2410-HF/1762652580.442474", + "retrieved_timestamp": "1762652580.442475", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prince-canuma/Ministral-8B-Instruct-2410-HF", + "developer": "prince-canuma", + "inference_platform": "unknown", + "id": "prince-canuma/Ministral-8B-Instruct-2410-HF" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5911636679565775 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4585611339334732 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19184290030211482 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32978723404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.02 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/72eccc9b-df63-4b2f-8975-a1c89940802c.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/72eccc9b-df63-4b2f-8975-a1c89940802c.json new file mode 100644 index 000000000..9997aa1b2 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/72eccc9b-df63-4b2f-8975-a1c89940802c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-8B-ProLong-512k-Instruct/1762652580.4434712", + "retrieved_timestamp": "1762652580.443472", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3977734632996006 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49830327201612584 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0581570996978852 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.425 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3246343085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/e30fead2-6516-480f-abd8-6ad0713cb053.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/e30fead2-6516-480f-abd8-6ad0713cb053.json new file mode 100644 index 000000000..0ede079bf --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-512k-Instruct/e30fead2-6516-480f-abd8-6ad0713cb053.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-8B-ProLong-512k-Instruct/1762652580.4431858", + "retrieved_timestamp": "1762652580.443187", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5508218194390884 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5028310716285619 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.052870090634441085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42664583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32313829787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-64k-Instruct/9c801b4e-228b-42a8-a7f7-ea2bf125d716.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-64k-Instruct/9c801b4e-228b-42a8-a7f7-ea2bf125d716.json new file mode 100644 index 000000000..c5da9955f --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-8B-ProLong-64k-Instruct/9c801b4e-228b-42a8-a7f7-ea2bf125d716.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-8B-ProLong-64k-Instruct/1762652580.443907", + "retrieved_timestamp": "1762652580.4439082", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-8B-ProLong-64k-Instruct", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-8B-ProLong-64k-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5563172382611471 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5083040804243396 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2953020134228188 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43969791666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32746010638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-CPO/342c7c0f-92f0-4296-8e0a-519724133bb5.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-CPO/342c7c0f-92f0-4296-8e0a-519724133bb5.json new file mode 100644 index 000000000..ea5dfb470 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-CPO/342c7c0f-92f0-4296-8e0a-519724133bb5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-CPO/1762652580.444415", + "retrieved_timestamp": "1762652580.444416", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Base-8B-SFT-CPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Base-8B-SFT-CPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37034623687371726 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4594875922440002 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3608541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2976230053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-DPO/8afa4f43-96fb-46b1-84e8-bf98928aa484.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-DPO/8afa4f43-96fb-46b1-84e8-bf98928aa484.json new file mode 100644 index 000000000..197310cad --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-DPO/8afa4f43-96fb-46b1-84e8-bf98928aa484.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-DPO/1762652580.444683", + "retrieved_timestamp": "1762652580.444684", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Base-8B-SFT-DPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Base-8B-SFT-DPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41111251479407973 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46658506064913546 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04154078549848943 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38673958333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3078457446808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-IPO/71d5525f-c257-4b88-b84d-d75b3a8328fc.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-IPO/71d5525f-c257-4b88-b84d-d75b3a8328fc.json new file mode 100644 index 000000000..8d9234eb1 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-IPO/71d5525f-c257-4b88-b84d-d75b3a8328fc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-IPO/1762652580.444937", + "retrieved_timestamp": "1762652580.444937", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Base-8B-SFT-IPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Base-8B-SFT-IPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4486562321307464 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4690068582318399 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03927492447129909 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3919479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3115026595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-KTO/6c0d909f-ee4f-4e1a-8db9-abf1920359ed.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-KTO/6c0d909f-ee4f-4e1a-8db9-abf1920359ed.json new file mode 100644 index 000000000..e127fd411 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-KTO/6c0d909f-ee4f-4e1a-8db9-abf1920359ed.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-KTO/1762652580.4452229", + "retrieved_timestamp": "1762652580.445225", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Base-8B-SFT-KTO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Base-8B-SFT-KTO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4522533544329047 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4692852292721417 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.052870090634441085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3841979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3054355053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-ORPO/ba821a1c-3b8e-4952-9f7b-b1f18923c4e7.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-ORPO/ba821a1c-3b8e-4952-9f7b-b1f18923c4e7.json new file mode 100644 index 000000000..645b2c798 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-ORPO/ba821a1c-3b8e-4952-9f7b-b1f18923c4e7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-ORPO/1762652580.445469", + "retrieved_timestamp": "1762652580.4454699", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Base-8B-SFT-ORPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Base-8B-SFT-ORPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45165383404921167 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47340573024653915 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04682779456193353 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3706770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30826130319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RDPO/985ac874-e7eb-4431-81c2-a79f3865c696.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RDPO/985ac874-e7eb-4431-81c2-a79f3865c696.json new file mode 100644 index 000000000..7cb36b1ef --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RDPO/985ac874-e7eb-4431-81c2-a79f3865c696.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-RDPO/1762652580.445683", + "retrieved_timestamp": "1762652580.445684", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Base-8B-SFT-RDPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Base-8B-SFT-RDPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4480068440626427 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46620140448752295 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05740181268882175 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3062080536912752 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4027395833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30144614361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RRHF/cc9fb769-3d0b-4e53-9942-d4f99203a629.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RRHF/cc9fb769-3d0b-4e53-9942-d4f99203a629.json new file mode 100644 index 000000000..60e21e9e9 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-RRHF/cc9fb769-3d0b-4e53-9942-d4f99203a629.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-RRHF/1762652580.445896", + "retrieved_timestamp": "1762652580.445896", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Base-8B-SFT-RRHF", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Base-8B-SFT-RRHF" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3357247658435174 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4520360167602379 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37222916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2888962765957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF/596f4d11-f091-42c3-9f1e-b95e0ba6dbd9.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF/596f4d11-f091-42c3-9f1e-b95e0ba6dbd9.json new file mode 100644 index 000000000..3434be841 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF/596f4d11-f091-42c3-9f1e-b95e0ba6dbd9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-SLiC-HF/1762652580.4460979", + "retrieved_timestamp": "1762652580.446099", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4890479483326463 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4704075127777334 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40909375000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30634973404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SimPO/314cfcd7-674a-49d2-adf5-6d45c30e2382.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SimPO/314cfcd7-674a-49d2-adf5-6d45c30e2382.json new file mode 100644 index 000000000..744a098d2 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT-SimPO/314cfcd7-674a-49d2-adf5-6d45c30e2382.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-SimPO/1762652580.446312", + "retrieved_timestamp": "1762652580.446312", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Base-8B-SFT-SimPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Base-8B-SFT-SimPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4685401401614383 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47412507033960827 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41268750000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31050531914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT/494df3f9-7ce9-4f81-99c4-e6100d6e4187.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT/494df3f9-7ce9-4f81-99c4-e6100d6e4187.json new file mode 100644 index 000000000..3be46eca1 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Base-8B-SFT/494df3f9-7ce9-4f81-99c4-e6100d6e4187.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT/1762652580.444184", + "retrieved_timestamp": "1762652580.444185", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Base-8B-SFT", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Base-8B-SFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27959591661236627 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.464303802632615 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4117916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3093417553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2/2de21869-2851-43f8-b5c3-a4b9e0e6e3ac.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2/2de21869-2851-43f8-b5c3-a4b9e0e6e3ac.json new file mode 100644 index 000000000..92bd2dd8b --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2/2de21869-2851-43f8-b5c3-a4b9e0e6e3ac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-CPO-v0.2/1762652580.44678", + "retrieved_timestamp": "1762652580.446781", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7505817896514582 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5026669871217129 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10800604229607251 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36190625000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37059507978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO/95eb37c8-2a58-45e3-bd86-2c305e3cb5dd.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO/95eb37c8-2a58-45e3-bd86-2c305e3cb5dd.json new file mode 100644 index 000000000..2ac326aa3 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-CPO/95eb37c8-2a58-45e3-bd86-2c305e3cb5dd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-CPO/1762652580.4465249", + "retrieved_timestamp": "1762652580.446526", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Instruct-8B-CPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Instruct-8B-CPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7292993701157373 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4998793158888361 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09894259818731117 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35139583333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36519281914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2/6ae028c9-19d9-447b-93c1-c4548aef84f9.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2/6ae028c9-19d9-447b-93c1-c4548aef84f9.json new file mode 100644 index 000000000..67cfe7ddd --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2/6ae028c9-19d9-447b-93c1-c4548aef84f9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-DPO-v0.2/1762652580.447217", + "retrieved_timestamp": "1762652580.447217", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7208063493752133 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.505620320855615 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08987915407854985 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3844479166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37691156914893614 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO/81c7a3df-7e92-4efa-a323-51ea3e0a4fa6.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO/81c7a3df-7e92-4efa-a323-51ea3e0a4fa6.json new file mode 100644 index 000000000..8a27fb918 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-DPO/81c7a3df-7e92-4efa-a323-51ea3e0a4fa6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-DPO/1762652580.447003", + "retrieved_timestamp": "1762652580.447003", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Instruct-8B-DPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Instruct-8B-DPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6757436934001781 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4991303079139502 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27181208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37381250000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36652260638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2/5f35c42b-2d34-42bc-b94e-127a678cad2c.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2/5f35c42b-2d34-42bc-b94e-127a678cad2c.json new file mode 100644 index 000000000..cbfd096e9 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2/5f35c42b-2d34-42bc-b94e-127a678cad2c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-KTO-v0.2/1762652580.447652", + "retrieved_timestamp": "1762652580.447653", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7290245437660962 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5079766897761946 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09969788519637462 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37775 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3667719414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO/e8602fbb-422c-464e-87f4-79c6e1a4afcf.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO/e8602fbb-422c-464e-87f4-79c6e1a4afcf.json new file mode 100644 index 000000000..d081bea14 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-KTO/e8602fbb-422c-464e-87f4-79c6e1a4afcf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-KTO/1762652580.4474308", + "retrieved_timestamp": "1762652580.447432", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Instruct-8B-KTO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Instruct-8B-KTO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6864098370102439 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4981903187457697 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07250755287009064 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36984374999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35987367021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2/28bf3b2a-6c0c-4994-aaf5-80b67d82a955.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2/28bf3b2a-6c0c-4994-aaf5-80b67d82a955.json new file mode 100644 index 000000000..1361dbbc7 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2/28bf3b2a-6c0c-4994-aaf5-80b67d82a955.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-ORPO-v0.2/1762652580.448072", + "retrieved_timestamp": "1762652580.448073", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7633213207622442 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.507835231782556 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10196374622356495 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37796874999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37308843085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO/8789e9aa-5cfb-4eca-9795-540c5a9b4bb4.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO/8789e9aa-5cfb-4eca-9795-540c5a9b4bb4.json new file mode 100644 index 000000000..c301a8aa3 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-ORPO/8789e9aa-5cfb-4eca-9795-540c5a9b4bb4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-ORPO/1762652580.447865", + "retrieved_timestamp": "1762652580.4478662", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Instruct-8B-ORPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Instruct-8B-ORPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.712813113649561 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5001206199104097 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07854984894259819 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35018750000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36461103723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2/1c3ea099-8b3b-4184-9f30-e7cdeea8f24e.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2/1c3ea099-8b3b-4184-9f30-e7cdeea8f24e.json new file mode 100644 index 000000000..748934115 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2/1c3ea099-8b3b-4184-9f30-e7cdeea8f24e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-RDPO-v0.2/1762652580.448503", + "retrieved_timestamp": "1762652580.448504", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7076922565459647 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5049218189829557 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08685800604229607 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3804479166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37741023936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO/041d45dd-c371-4e9c-9cda-a63e3d7a1b2d.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO/041d45dd-c371-4e9c-9cda-a63e3d7a1b2d.json new file mode 100644 index 000000000..ecf3cdff5 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RDPO/041d45dd-c371-4e9c-9cda-a63e3d7a1b2d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-RDPO/1762652580.448289", + "retrieved_timestamp": "1762652580.44829", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Instruct-8B-RDPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Instruct-8B-RDPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6660017642078574 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5033626077797596 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3752083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36070478723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2/bc221748-c03b-4fee-9147-8f63b0017f0c.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2/bc221748-c03b-4fee-9147-8f63b0017f0c.json new file mode 100644 index 000000000..8db1ef1b5 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2/bc221748-c03b-4fee-9147-8f63b0017f0c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-RRHF-v0.2/1762652580.4489532", + "retrieved_timestamp": "1762652580.448954", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.712488419615509 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49838952572927536 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08761329305135952 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37378125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3482380319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF/e93eff52-c6e1-474e-8089-f672000fe1e5.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF/e93eff52-c6e1-474e-8089-f672000fe1e5.json new file mode 100644 index 000000000..783e002cc --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-RRHF/e93eff52-c6e1-474e-8089-f672000fe1e5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-RRHF/1762652580.4487302", + "retrieved_timestamp": "1762652580.448731", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Instruct-8B-RRHF", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Instruct-8B-RRHF" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7274509412802475 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49105468765647214 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09667673716012085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3475520833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36436170212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2/5a5746dd-0270-4151-b774-8eaa6860d5e0.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2/5a5746dd-0270-4151-b774-8eaa6860d5e0.json new file mode 100644 index 000000000..92ac74a92 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2/5a5746dd-0270-4151-b774-8eaa6860d5e0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-SLiC-HF-v0.2/1762652580.4493709", + "retrieved_timestamp": "1762652580.4493718", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7109646848140712 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49838952572927536 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08761329305135952 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37378125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3482380319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF/aaa9cd01-cca9-489c-91e0-79ff026eb258.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF/aaa9cd01-cca9-489c-91e0-79ff026eb258.json new file mode 100644 index 000000000..8375e292a --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SLiC-HF/aaa9cd01-cca9-489c-91e0-79ff026eb258.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-SLiC-HF/1762652580.449163", + "retrieved_timestamp": "1762652580.449164", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7399655137258031 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5029422936734547 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09743202416918428 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3722916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35846077127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2/5e499da1-f8c1-4830-828c-7d4013ea0243.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2/5e499da1-f8c1-4830-828c-7d4013ea0243.json new file mode 100644 index 000000000..e99041135 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2/5e499da1-f8c1-4830-828c-7d4013ea0243.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-SimPO-v0.2/1762652580.44994", + "retrieved_timestamp": "1762652580.449941", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6808645505037745 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.503833834044343 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07401812688821752 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3988020833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36220079787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO/fcd2c5e3-ebfd-4c1c-ac8a-d28ec08f1bf2.json b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO/fcd2c5e3-ebfd-4c1c-ac8a-d28ec08f1bf2.json new file mode 100644 index 000000000..fbea90fc0 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Llama-3-Instruct-8B-SimPO/fcd2c5e3-ebfd-4c1c-ac8a-d28ec08f1bf2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-SimPO/1762652580.449708", + "retrieved_timestamp": "1762652580.449709", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Llama-3-Instruct-8B-SimPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Llama-3-Instruct-8B-SimPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6503898544750152 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48446848524905367 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08610271903323263 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39483333333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3489029255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-CPO/4c2ab1ed-8177-4518-ae3d-754f9711369d.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-CPO/4c2ab1ed-8177-4518-ae3d-754f9711369d.json new file mode 100644 index 000000000..a64f55258 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-CPO/4c2ab1ed-8177-4518-ae3d-754f9711369d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-CPO/1762652580.45017", + "retrieved_timestamp": "1762652580.450171", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Mistral-7B-Base-SFT-CPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Mistral-7B-Base-SFT-CPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46549267055856236 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43821512506663574 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4070833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26512632978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-DPO/133d7669-db7f-47b6-b838-51b9577a9e68.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-DPO/133d7669-db7f-47b6-b838-51b9577a9e68.json new file mode 100644 index 000000000..8b8dba23e --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-DPO/133d7669-db7f-47b6-b838-51b9577a9e68.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-DPO/1762652580.450392", + "retrieved_timestamp": "1762652580.4503932", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Mistral-7B-Base-SFT-DPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Mistral-7B-Base-SFT-DPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44033830237104216 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43501123979612694 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.021148036253776436 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41222916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26454454787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-IPO/b402d383-b80e-4cd9-b2ec-a1e435f67ac5.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-IPO/b402d383-b80e-4cd9-b2ec-a1e435f67ac5.json new file mode 100644 index 000000000..bd624ee84 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-IPO/b402d383-b80e-4cd9-b2ec-a1e435f67ac5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-IPO/1762652580.4506009", + "retrieved_timestamp": "1762652580.450602", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Mistral-7B-Base-SFT-IPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Mistral-7B-Base-SFT-IPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48295300912689443 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4458024605899282 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028700906344410877 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37762500000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2791722074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-KTO/a0048817-4f45-4bca-ac1a-b7e0c25bd7ab.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-KTO/a0048817-4f45-4bca-ac1a-b7e0c25bd7ab.json new file mode 100644 index 000000000..1a24da1d0 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-KTO/a0048817-4f45-4bca-ac1a-b7e0c25bd7ab.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-KTO/1762652580.450817", + "retrieved_timestamp": "1762652580.450818", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Mistral-7B-Base-SFT-KTO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Mistral-7B-Base-SFT-KTO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.478481540091402 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44764334464528677 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03927492447129909 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43678124999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28715093085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RDPO/034fa9fa-4103-428d-a50e-b117ef5e0726.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RDPO/034fa9fa-4103-428d-a50e-b117ef5e0726.json new file mode 100644 index 000000000..69584298b --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RDPO/034fa9fa-4103-428d-a50e-b117ef5e0726.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-RDPO/1762652580.451031", + "retrieved_timestamp": "1762652580.4510322", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Mistral-7B-Base-SFT-RDPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Mistral-7B-Base-SFT-RDPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46064663980460735 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44395328626924213 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02190332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3579375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27767619680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RRHF/fbbd671a-3005-448a-bc15-718ba23bcf72.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RRHF/fbbd671a-3005-448a-bc15-718ba23bcf72.json new file mode 100644 index 000000000..ecfe62500 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-RRHF/fbbd671a-3005-448a-bc15-718ba23bcf72.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-RRHF/1762652580.451245", + "retrieved_timestamp": "1762652580.451246", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Mistral-7B-Base-SFT-RRHF", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Mistral-7B-Base-SFT-RRHF" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44066299640509404 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42805937403716016 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.024924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4186770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23977726063829788 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF/2c28dcd3-af20-41ab-9234-a8296ecc98c0.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF/2c28dcd3-af20-41ab-9234-a8296ecc98c0.json new file mode 100644 index 000000000..073780428 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF/2c28dcd3-af20-41ab-9234-a8296ecc98c0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-SLiC-HF/1762652580.451465", + "retrieved_timestamp": "1762652580.451466", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5127284494031392 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44223991890402176 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.035498489425981876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42608333333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2780917553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SimPO/9bed5ccb-35c0-40e1-89b8-617656787052.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SimPO/9bed5ccb-35c0-40e1-89b8-617656787052.json new file mode 100644 index 000000000..12fb70412 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Base-SFT-SimPO/9bed5ccb-35c0-40e1-89b8-617656787052.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-SimPO/1762652580.4516768", + "retrieved_timestamp": "1762652580.451678", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Mistral-7B-Base-SFT-SimPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Mistral-7B-Base-SFT-SimPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47006387496287627 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4398050727924064 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39706250000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27019614361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-CPO/259a0166-2ee3-409a-85ce-963d90d05ae7.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-CPO/259a0166-2ee3-409a-85ce-963d90d05ae7.json new file mode 100644 index 000000000..bf2cc1a03 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-CPO/259a0166-2ee3-409a-85ce-963d90d05ae7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-CPO/1762652580.4518862", + "retrieved_timestamp": "1762652580.4518871", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Mistral-7B-Instruct-CPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Mistral-7B-Instruct-CPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4203047912871182 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.406922267565148 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41784375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701130319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-DPO/0df26c01-7fae-4254-8e97-e03c6078d861.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-DPO/0df26c01-7fae-4254-8e97-e03c6078d861.json new file mode 100644 index 000000000..6da186d99 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-DPO/0df26c01-7fae-4254-8e97-e03c6078d861.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-DPO/1762652580.4521", + "retrieved_timestamp": "1762652580.4521", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Mistral-7B-Instruct-DPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Mistral-7B-Instruct-DPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.517624347841505 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4060358459697702 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030966767371601207 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3833333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2748503989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-IPO/fed6b773-040e-409b-884e-a97a1abfedc0.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-IPO/fed6b773-040e-409b-884e-a97a1abfedc0.json new file mode 100644 index 000000000..db9b520e4 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-IPO/fed6b773-040e-409b-884e-a97a1abfedc0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-IPO/1762652580.45231", + "retrieved_timestamp": "1762652580.45231", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Mistral-7B-Instruct-IPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Mistral-7B-Instruct-IPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4929198969844457 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4322183023180588 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43241666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2707779255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-KTO/ff079687-4519-4f0b-bb1e-2b447cb2b4c9.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-KTO/ff079687-4519-4f0b-bb1e-2b447cb2b4c9.json new file mode 100644 index 000000000..279fa8513 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-KTO/ff079687-4519-4f0b-bb1e-2b447cb2b4c9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-KTO/1762652580.452526", + "retrieved_timestamp": "1762652580.452527", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Mistral-7B-Instruct-KTO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Mistral-7B-Instruct-KTO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4907966417993147 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4139586477181159 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3952708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28125 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-ORPO/36735132-1510-42cf-a68a-c46507f52edb.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-ORPO/36735132-1510-42cf-a68a-c46507f52edb.json new file mode 100644 index 000000000..6a013df61 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-ORPO/36735132-1510-42cf-a68a-c46507f52edb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-ORPO/1762652580.452744", + "retrieved_timestamp": "1762652580.452745", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Mistral-7B-Instruct-ORPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Mistral-7B-Instruct-ORPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4719621714827768 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41040615756566107 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3912395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2662067819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RDPO/9989efbb-bd01-4c7c-bf30-67fa81698906.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RDPO/9989efbb-bd01-4c7c-bf30-67fa81698906.json new file mode 100644 index 000000000..71434b79d --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RDPO/9989efbb-bd01-4c7c-bf30-67fa81698906.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-RDPO/1762652580.452956", + "retrieved_timestamp": "1762652580.452957", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Mistral-7B-Instruct-RDPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Mistral-7B-Instruct-RDPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4887232542985944 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40501479745073615 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.024924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3873333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27767619680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RRHF/0a5ce684-675e-4fbe-b141-df12903228f9.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RRHF/0a5ce684-675e-4fbe-b141-df12903228f9.json new file mode 100644 index 000000000..9c52fe1dc --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-RRHF/0a5ce684-675e-4fbe-b141-df12903228f9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-RRHF/1762652580.4531672", + "retrieved_timestamp": "1762652580.4531682", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Mistral-7B-Instruct-RRHF", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Mistral-7B-Instruct-RRHF" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49601723427173233 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41897663476657404 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.397875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26512632978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SLiC-HF/8b5493df-86fd-495a-8dce-9c5398795fc9.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SLiC-HF/8b5493df-86fd-495a-8dce-9c5398795fc9.json new file mode 100644 index 000000000..918b74b77 --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SLiC-HF/8b5493df-86fd-495a-8dce-9c5398795fc9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-SLiC-HF/1762652580.453388", + "retrieved_timestamp": "1762652580.4533892", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Mistral-7B-Instruct-SLiC-HF", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Mistral-7B-Instruct-SLiC-HF" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5115294086357531 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4040013641288438 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.017371601208459216 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39130208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27152593085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SimPO/a3d0b6ec-e2be-4ca5-b083-df3c7ea0b385.json b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SimPO/a3d0b6ec-e2be-4ca5-b083-df3c7ea0b385.json new file mode 100644 index 000000000..62eb7a95b --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/Mistral-7B-Instruct-SimPO/a3d0b6ec-e2be-4ca5-b083-df3c7ea0b385.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-SimPO/1762652580.45361", + "retrieved_timestamp": "1762652580.45361", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/Mistral-7B-Instruct-SimPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/Mistral-7B-Instruct-SimPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4686897432146704 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4507226157033399 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028700906344410877 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40978125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2796708776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/princeton-nlp/gemma-2-9b-it-DPO/5ed0019b-dc1e-4dd8-82e5-2d4cdb28beb9.json b/data/hfopenllm_v2/princeton-nlp/gemma-2-9b-it-DPO/5ed0019b-dc1e-4dd8-82e5-2d4cdb28beb9.json new file mode 100644 index 000000000..a6a163dcf --- /dev/null +++ b/data/hfopenllm_v2/princeton-nlp/gemma-2-9b-it-DPO/5ed0019b-dc1e-4dd8-82e5-2d4cdb28beb9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/princeton-nlp_gemma-2-9b-it-DPO/1762652580.454305", + "retrieved_timestamp": "1762652580.4543061", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "princeton-nlp/gemma-2-9b-it-DPO", + "developer": "princeton-nlp", + "inference_platform": "unknown", + "id": "princeton-nlp/gemma-2-9b-it-DPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27687203287277756 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5941444682956648 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33557046979865773 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38203125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3723404255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Bellatrix-1.5B-xElite/7f1c6c88-823f-4597-9794-bf05c076d4d3.json b/data/hfopenllm_v2/prithivMLmods/Bellatrix-1.5B-xElite/7f1c6c88-823f-4597-9794-bf05c076d4d3.json new file mode 100644 index 000000000..da26d1444 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Bellatrix-1.5B-xElite/7f1c6c88-823f-4597-9794-bf05c076d4d3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Bellatrix-1.5B-xElite/1762652580.4551811", + "retrieved_timestamp": "1762652580.455182", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Bellatrix-1.5B-xElite", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Bellatrix-1.5B-xElite" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1964144026737944 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35011984799236834 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28700906344410876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36190625000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1657247340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1.5B-R1/4e78f82e-aa31-414c-9c59-9c8e318fff17.json b/data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1.5B-R1/4e78f82e-aa31-414c-9c59-9c8e318fff17.json new file mode 100644 index 000000000..ec499733f --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1.5B-R1/4e78f82e-aa31-414c-9c59-9c8e318fff17.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Bellatrix-Tiny-1.5B-R1/1762652580.455581", + "retrieved_timestamp": "1762652580.455582", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Bellatrix-Tiny-1.5B-R1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Bellatrix-Tiny-1.5B-R1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33522498082864577 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40221745714531076 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3682916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27509973404255317 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1B-v2/715be726-e0e3-4589-91cf-85e41dbcbf8a.json b/data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1B-v2/715be726-e0e3-4589-91cf-85e41dbcbf8a.json new file mode 100644 index 000000000..3d929cc3a --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Bellatrix-Tiny-1B-v2/715be726-e0e3-4589-91cf-85e41dbcbf8a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Bellatrix-Tiny-1B-v2/1762652580.4558249", + "retrieved_timestamp": "1762652580.4558249", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Bellatrix-Tiny-1B-v2", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Bellatrix-Tiny-1B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15095169705270903 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3267684418723903 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.028700906344410877 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34302083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14926861702127658 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Blaze-14B-xElite/c4041b70-acce-4088-a3b9-299d4424e240.json b/data/hfopenllm_v2/prithivMLmods/Blaze-14B-xElite/c4041b70-acce-4088-a3b9-299d4424e240.json new file mode 100644 index 000000000..0ef70497b --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Blaze-14B-xElite/c4041b70-acce-4088-a3b9-299d4424e240.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Blaze-14B-xElite/1762652580.456049", + "retrieved_timestamp": "1762652580.45605", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Blaze-14B-xElite", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Blaze-14B-xElite" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03632029681245762 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6627817236091689 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3693353474320242 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39429530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46248958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5111369680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/COCO-7B-Instruct-1M/a7b425bc-9160-44ed-abf1-18c3b84cede2.json b/data/hfopenllm_v2/prithivMLmods/COCO-7B-Instruct-1M/a7b425bc-9160-44ed-abf1-18c3b84cede2.json new file mode 100644 index 000000000..ce61b03b4 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/COCO-7B-Instruct-1M/a7b425bc-9160-44ed-abf1-18c3b84cede2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_COCO-7B-Instruct-1M/1762652580.456335", + "retrieved_timestamp": "1762652580.456337", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/COCO-7B-Instruct-1M", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/COCO-7B-Instruct-1M" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4743103853331383 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5409956853800891 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3496978851963746 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4382395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41863364361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-1M/0c883e9c-4cec-4c65-aa10-96e0d0de2e1f.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-1M/0c883e9c-4cec-4c65-aa10-96e0d0de2e1f.json new file mode 100644 index 000000000..c00033ca0 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-1M/0c883e9c-4cec-4c65-aa10-96e0d0de2e1f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite-1M/1762652580.457102", + "retrieved_timestamp": "1762652580.457103", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Calcium-Opus-14B-Elite-1M", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Calcium-Opus-14B-Elite-1M" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5612884923115112 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6329399079569701 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44561933534743203 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3523489932885906 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46760416666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5152094414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-Stock/74d10ea5-3d08-4bb2-9246-5e053eb20fea.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-Stock/74d10ea5-3d08-4bb2-9246-5e053eb20fea.json new file mode 100644 index 000000000..33475730e --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite-Stock/74d10ea5-3d08-4bb2-9246-5e053eb20fea.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite-Stock/1762652580.457346", + "retrieved_timestamp": "1762652580.4573472", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Calcium-Opus-14B-Elite-Stock", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Calcium-Opus-14B-Elite-Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.614294516327788 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6328767168557433 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46676737160120846 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36828859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48075 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5284242021276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/487e1883-01c6-4714-9447-67837c78655b.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/487e1883-01c6-4714-9447-67837c78655b.json new file mode 100644 index 000000000..73f723aff --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/487e1883-01c6-4714-9447-67837c78655b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite/1762652580.456628", + "retrieved_timestamp": "1762652580.456629", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Calcium-Opus-14B-Elite", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Calcium-Opus-14B-Elite" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6051521075191603 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6317361472468987 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4788519637462236 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37416107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4859583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5301695478723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/79bccc27-27a0-4194-9c46-5e89b0f21b9e.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/79bccc27-27a0-4194-9c46-5e89b0f21b9e.json new file mode 100644 index 000000000..63ab78ed9 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite/79bccc27-27a0-4194-9c46-5e89b0f21b9e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite/1762652580.456884", + "retrieved_timestamp": "1762652580.456885", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Calcium-Opus-14B-Elite", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Calcium-Opus-14B-Elite" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6063511482865463 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6295900497885079 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37084592145015105 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3733221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48732291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5306682180851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2-R1/6eeb591b-aed2-4cdd-85bb-75011c9c5760.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2-R1/6eeb591b-aed2-4cdd-85bb-75011c9c5760.json new file mode 100644 index 000000000..7e6c626d7 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2-R1/6eeb591b-aed2-4cdd-85bb-75011c9c5760.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite2-R1/1762652580.457828", + "retrieved_timestamp": "1762652580.4578292", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Calcium-Opus-14B-Elite2-R1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Calcium-Opus-14B-Elite2-R1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6325793339450436 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6362357624539174 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3338368580060423 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39093959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48998958333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5247672872340425 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2/689d38cd-898e-43ec-92e8-238cefac6776.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2/689d38cd-898e-43ec-92e8-238cefac6776.json new file mode 100644 index 000000000..2012f8458 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite2/689d38cd-898e-43ec-92e8-238cefac6776.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite2/1762652580.457599", + "retrieved_timestamp": "1762652580.4576", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Calcium-Opus-14B-Elite2", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Calcium-Opus-14B-Elite2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6176168122803052 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6318256156619112 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4690332326283988 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3699664429530201 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49395833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5300864361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite3/2edb276e-86c5-4bde-a696-4f68fb659b4e.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite3/2edb276e-86c5-4bde-a696-4f68fb659b4e.json new file mode 100644 index 000000000..afd20f4b9 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite3/2edb276e-86c5-4bde-a696-4f68fb659b4e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite3/1762652580.458055", + "retrieved_timestamp": "1762652580.458056", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Calcium-Opus-14B-Elite3", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Calcium-Opus-14B-Elite3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5428285837134359 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6350402275340573 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4705438066465257 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37080536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4794791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5334940159574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite4/380cd349-5309-40b8-b549-ac6d6d42331a.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite4/380cd349-5309-40b8-b549-ac6d6d42331a.json new file mode 100644 index 000000000..580e991a6 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Elite4/380cd349-5309-40b8-b549-ac6d6d42331a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite4/1762652580.4582741", + "retrieved_timestamp": "1762652580.458275", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Calcium-Opus-14B-Elite4", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Calcium-Opus-14B-Elite4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6111971790405014 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6195264951573699 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36253776435045315 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35570469798657717 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46871875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.514876994680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Merge/6d4dfc45-b7ff-47a2-bcf0-f12641365cbf.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Merge/6d4dfc45-b7ff-47a2-bcf0-f12641365cbf.json new file mode 100644 index 000000000..27a6272f5 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-14B-Merge/6d4dfc45-b7ff-47a2-bcf0-f12641365cbf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Merge/1762652580.4585001", + "retrieved_timestamp": "1762652580.458503", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Calcium-Opus-14B-Merge", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Calcium-Opus-14B-Merge" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4949434168007554 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6319290054891645 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4637462235649547 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37080536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48608333333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5355718085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-20B-v1/9c414577-7f2d-487a-9f2b-7675e0532ac1.json b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-20B-v1/9c414577-7f2d-487a-9f2b-7675e0532ac1.json new file mode 100644 index 000000000..7fd29b0c5 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Calcium-Opus-20B-v1/9c414577-7f2d-487a-9f2b-7675e0532ac1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-20B-v1/1762652580.458724", + "retrieved_timestamp": "1762652580.4587252", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Calcium-Opus-20B-v1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Calcium-Opus-20B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3092716215197897 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.599033246250772 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36178247734138974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35318791946308725 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49433333333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4734042553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 19.173 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Codepy-Deepthink-3B/adb6f7d5-db2f-49b1-aab4-1fd3dfcb7e34.json b/data/hfopenllm_v2/prithivMLmods/Codepy-Deepthink-3B/adb6f7d5-db2f-49b1-aab4-1fd3dfcb7e34.json new file mode 100644 index 000000000..842d0a907 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Codepy-Deepthink-3B/adb6f7d5-db2f-49b1-aab4-1fd3dfcb7e34.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Codepy-Deepthink-3B/1762652580.458943", + "retrieved_timestamp": "1762652580.458944", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Codepy-Deepthink-3B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Codepy-Deepthink-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43271962836385236 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4259451388094382 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11555891238670694 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3310208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3090093085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Coma-II-14B/785e4cde-ec97-4e36-8ee3-3fb4c2543901.json b/data/hfopenllm_v2/prithivMLmods/Coma-II-14B/785e4cde-ec97-4e36-8ee3-3fb4c2543901.json new file mode 100644 index 000000000..184a0c16a --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Coma-II-14B/785e4cde-ec97-4e36-8ee3-3fb4c2543901.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Coma-II-14B/1762652580.4591591", + "retrieved_timestamp": "1762652580.45916", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Coma-II-14B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Coma-II-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.416832892281369 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6320713788922736 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4001677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5351041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5039893617021277 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Condor-Opus-14B-Exp/7b9f72e6-0280-46ba-8645-ab8dcb9ddf4d.json b/data/hfopenllm_v2/prithivMLmods/Condor-Opus-14B-Exp/7b9f72e6-0280-46ba-8645-ab8dcb9ddf4d.json new file mode 100644 index 000000000..4f1655550 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Condor-Opus-14B-Exp/7b9f72e6-0280-46ba-8645-ab8dcb9ddf4d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Condor-Opus-14B-Exp/1762652580.4595032", + "retrieved_timestamp": "1762652580.4595041", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Condor-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Condor-Opus-14B-Exp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40431831983581346 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6154220154262888 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5226586102719033 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39177852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5193854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5014128989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Cygnus-II-14B/120d9ddf-0e6e-4fb9-9250-019d1fbfdc28.json b/data/hfopenllm_v2/prithivMLmods/Cygnus-II-14B/120d9ddf-0e6e-4fb9-9250-019d1fbfdc28.json new file mode 100644 index 000000000..3f47cae71 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Cygnus-II-14B/120d9ddf-0e6e-4fb9-9250-019d1fbfdc28.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Cygnus-II-14B/1762652580.4597278", + "retrieved_timestamp": "1762652580.459729", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Cygnus-II-14B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Cygnus-II-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6184412913292286 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6660565208074918 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4395770392749245 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3875838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46884375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5390625 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-14B/343e0d36-5470-4865-aeeb-a9963b38f90a.json b/data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-14B/343e0d36-5470-4865-aeeb-a9963b38f90a.json new file mode 100644 index 000000000..a6d88179e --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-14B/343e0d36-5470-4865-aeeb-a9963b38f90a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Deepthink-Reasoning-14B/1762652580.460205", + "retrieved_timestamp": "1762652580.460206", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Deepthink-Reasoning-14B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Deepthink-Reasoning-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5423542866261519 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6334054936091441 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4229607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36661073825503354 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47315625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5295877659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-7B/10d2454a-ae69-43b6-962a-77102645ed56.json b/data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-7B/10d2454a-ae69-43b6-962a-77102645ed56.json new file mode 100644 index 000000000..9543a14d8 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Deepthink-Reasoning-7B/10d2454a-ae69-43b6-962a-77102645ed56.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Deepthink-Reasoning-7B/1762652580.460416", + "retrieved_timestamp": "1762652580.460416", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Deepthink-Reasoning-7B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Deepthink-Reasoning-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48400244684104843 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5505070216145282 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33459214501510576 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4432291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43492353723404253 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Dinobot-Opus-14B-Exp/6ed13eae-92ee-4fa7-9ed8-d9f21d6de48c.json b/data/hfopenllm_v2/prithivMLmods/Dinobot-Opus-14B-Exp/6ed13eae-92ee-4fa7-9ed8-d9f21d6de48c.json new file mode 100644 index 000000000..70699a51e --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Dinobot-Opus-14B-Exp/6ed13eae-92ee-4fa7-9ed8-d9f21d6de48c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Dinobot-Opus-14B-Exp/1762652580.460635", + "retrieved_timestamp": "1762652580.460635", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Dinobot-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Dinobot-Opus-14B-Exp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8239958864701216 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6370093752306357 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5317220543806647 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42603125000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4979222074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Elita-0.1-Distilled-R1-abliterated/9b63b3ad-568f-4f15-9cc6-36049ac89727.json b/data/hfopenllm_v2/prithivMLmods/Elita-0.1-Distilled-R1-abliterated/9b63b3ad-568f-4f15-9cc6-36049ac89727.json new file mode 100644 index 000000000..2e0cffaa2 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Elita-0.1-Distilled-R1-abliterated/9b63b3ad-568f-4f15-9cc6-36049ac89727.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Elita-0.1-Distilled-R1-abliterated/1762652580.460851", + "retrieved_timestamp": "1762652580.460852", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Elita-0.1-Distilled-R1-abliterated", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Elita-0.1-Distilled-R1-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35423454212600347 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38277850218543213 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3066465256797583 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36596875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2757646276595745 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Elita-1/d721cfe0-eb01-42fe-955a-bfd219c38917.json b/data/hfopenllm_v2/prithivMLmods/Elita-1/d721cfe0-eb01-42fe-955a-bfd219c38917.json new file mode 100644 index 000000000..f95e5e160 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Elita-1/d721cfe0-eb01-42fe-955a-bfd219c38917.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Elita-1/1762652580.4610822", + "retrieved_timestamp": "1762652580.4610822", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Elita-1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Elita-1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4906470387460826 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6520409113818334 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3429003021148036 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37583892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48341666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5381482712765957 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Epimetheus-14B-Axo/dc3aed7d-01e0-46cc-85f6-2a06cf6b6edc.json b/data/hfopenllm_v2/prithivMLmods/Epimetheus-14B-Axo/dc3aed7d-01e0-46cc-85f6-2a06cf6b6edc.json new file mode 100644 index 000000000..c388384f0 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Epimetheus-14B-Axo/dc3aed7d-01e0-46cc-85f6-2a06cf6b6edc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Epimetheus-14B-Axo/1762652580.461361", + "retrieved_timestamp": "1762652580.461361", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Epimetheus-14B-Axo", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Epimetheus-14B-Axo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.554643900406477 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6613340892011862 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41012084592145015 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3926174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4819583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5304188829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Equuleus-Opus-14B-Exp/ccce28fd-d3ae-427c-b848-f08b2cf85692.json b/data/hfopenllm_v2/prithivMLmods/Equuleus-Opus-14B-Exp/ccce28fd-d3ae-427c-b848-f08b2cf85692.json new file mode 100644 index 000000000..99636afad --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Equuleus-Opus-14B-Exp/ccce28fd-d3ae-427c-b848-f08b2cf85692.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Equuleus-Opus-14B-Exp/1762652580.46158", + "retrieved_timestamp": "1762652580.46158", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Equuleus-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Equuleus-Opus-14B-Exp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7000735825387749 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6433769213927613 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45845921450151056 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38674496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4951666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5374002659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Eridanus-Opus-14B-r999/9dd4aa3f-98aa-4e51-bd21-c999b3990a64.json b/data/hfopenllm_v2/prithivMLmods/Eridanus-Opus-14B-r999/9dd4aa3f-98aa-4e51-bd21-c999b3990a64.json new file mode 100644 index 000000000..ac87985be --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Eridanus-Opus-14B-r999/9dd4aa3f-98aa-4e51-bd21-c999b3990a64.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Eridanus-Opus-14B-r999/1762652580.461785", + "retrieved_timestamp": "1762652580.461786", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Eridanus-Opus-14B-r999", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Eridanus-Opus-14B-r999" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.638574537781974 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6583918169279829 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3859516616314199 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39429530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.476875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5361535904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Evac-Opus-14B-Exp/26c88cb2-7c81-4b0c-8493-baa9d8f7b1a0.json b/data/hfopenllm_v2/prithivMLmods/Evac-Opus-14B-Exp/26c88cb2-7c81-4b0c-8493-baa9d8f7b1a0.json new file mode 100644 index 000000000..f5c68d9ed --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Evac-Opus-14B-Exp/26c88cb2-7c81-4b0c-8493-baa9d8f7b1a0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Evac-Opus-14B-Exp/1762652580.461996", + "retrieved_timestamp": "1762652580.461997", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Evac-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Evac-Opus-14B-Exp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5916135852870383 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6475440673701862 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4214501510574018 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3884228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47278125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5316655585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/FastThink-0.5B-Tiny/b731eb88-e0ed-4edb-bed3-2d82bbce43bb.json b/data/hfopenllm_v2/prithivMLmods/FastThink-0.5B-Tiny/b731eb88-e0ed-4edb-bed3-2d82bbce43bb.json new file mode 100644 index 000000000..4eafc23d2 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/FastThink-0.5B-Tiny/b731eb88-e0ed-4edb-bed3-2d82bbce43bb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_FastThink-0.5B-Tiny/1762652580.462207", + "retrieved_timestamp": "1762652580.462208", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/FastThink-0.5B-Tiny", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/FastThink-0.5B-Tiny" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25798880304259364 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3205583807088257 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3566354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16489361702127658 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview/7735d88c-bdaa-4a12-9a99-a2dc5ec2ec66.json b/data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview/7735d88c-bdaa-4a12-9a99-a2dc5ec2ec66.json new file mode 100644 index 000000000..bc9c36796 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview/7735d88c-bdaa-4a12-9a99-a2dc5ec2ec66.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_GWQ-9B-Preview/1762652580.4624221", + "retrieved_timestamp": "1762652580.462423", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/GWQ-9B-Preview", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/GWQ-9B-Preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5065836425129767 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5805745804247511 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22658610271903323 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33976510067114096 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4951041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39835438829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview2/5c534761-19b5-4111-b1f5-c2fc3e121b24.json b/data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview2/5c534761-19b5-4111-b1f5-c2fc3e121b24.json new file mode 100644 index 000000000..200bed56a --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/GWQ-9B-Preview2/5c534761-19b5-4111-b1f5-c2fc3e121b24.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_GWQ-9B-Preview2/1762652580.462637", + "retrieved_timestamp": "1762652580.4626381", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/GWQ-9B-Preview2", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/GWQ-9B-Preview2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5208967761096114 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5797218710843371 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23716012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3263422818791946 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48598958333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3996841755319149 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/GWQ2b/8a89468f-fe2f-4bc9-be99-c9619c605efc.json b/data/hfopenllm_v2/prithivMLmods/GWQ2b/8a89468f-fe2f-4bc9-be99-c9619c605efc.json new file mode 100644 index 000000000..3b39f82de --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/GWQ2b/8a89468f-fe2f-4bc9-be99-c9619c605efc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_GWQ2b/1762652580.462852", + "retrieved_timestamp": "1762652580.4628532", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/GWQ2b", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/GWQ2b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41148707651254224 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41433702954085216 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06268882175226587 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43111458333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24725731382978725 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Gaea-Opus-14B-Exp/f75e27a8-00e8-4473-b7ed-3fffa131ee0a.json b/data/hfopenllm_v2/prithivMLmods/Gaea-Opus-14B-Exp/f75e27a8-00e8-4473-b7ed-3fffa131ee0a.json new file mode 100644 index 000000000..d0376ff0e --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Gaea-Opus-14B-Exp/f75e27a8-00e8-4473-b7ed-3fffa131ee0a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Gaea-Opus-14B-Exp/1762652580.463063", + "retrieved_timestamp": "1762652580.463063", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Gaea-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Gaea-Opus-14B-Exp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5956351369920699 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6560465337491567 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42749244712990936 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39093959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48589583333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5400598404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Gauss-Opus-14B-R999/e8596a17-9e5d-4ac5-9968-44d302628c31.json b/data/hfopenllm_v2/prithivMLmods/Gauss-Opus-14B-R999/e8596a17-9e5d-4ac5-9968-44d302628c31.json new file mode 100644 index 000000000..d7e473e6a --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Gauss-Opus-14B-R999/e8596a17-9e5d-4ac5-9968-44d302628c31.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Gauss-Opus-14B-R999/1762652580.463757", + "retrieved_timestamp": "1762652580.463758", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Gauss-Opus-14B-R999", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Gauss-Opus-14B-R999" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39065457430728245 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6227831608555382 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5755287009063444 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39177852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5338333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.500748005319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Jolt-v0.1/d96ef95b-ca39-4e33-9f6b-a4faa71e5009.json b/data/hfopenllm_v2/prithivMLmods/Jolt-v0.1/d96ef95b-ca39-4e33-9f6b-a4faa71e5009.json new file mode 100644 index 000000000..2afc4b2ca --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Jolt-v0.1/d96ef95b-ca39-4e33-9f6b-a4faa71e5009.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Jolt-v0.1/1762652580.463978", + "retrieved_timestamp": "1762652580.463979", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Jolt-v0.1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Jolt-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5092066827129793 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6521408461659391 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3564954682779456 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3800335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48471875000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5386469414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Lacerta-Opus-14B-Elite8/21b53896-3b7b-470a-a49f-4b2cb4e6adef.json b/data/hfopenllm_v2/prithivMLmods/Lacerta-Opus-14B-Elite8/21b53896-3b7b-470a-a49f-4b2cb4e6adef.json new file mode 100644 index 000000000..3ecaf1405 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Lacerta-Opus-14B-Elite8/21b53896-3b7b-470a-a49f-4b2cb4e6adef.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Lacerta-Opus-14B-Elite8/1762652580.464193", + "retrieved_timestamp": "1762652580.464193", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Lacerta-Opus-14B-Elite8", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Lacerta-Opus-14B-Elite8" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.614144913274556 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6401384743047456 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3648036253776435 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3783557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4635416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5321642287234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Llama-3.1-5B-Instruct/cdc5671a-e164-43b9-864c-808a9464e618.json b/data/hfopenllm_v2/prithivMLmods/Llama-3.1-5B-Instruct/cdc5671a-e164-43b9-864c-808a9464e618.json new file mode 100644 index 000000000..d9bf57701 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Llama-3.1-5B-Instruct/cdc5671a-e164-43b9-864c-808a9464e618.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-3.1-5B-Instruct/1762652580.464407", + "retrieved_timestamp": "1762652580.4644082", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Llama-3.1-5B-Instruct", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Llama-3.1-5B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14066011516110588 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3051074188361172 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015105740181268883 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35400000000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11835106382978723 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 5.413 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Llama-3.1-8B-Open-SFT/37276848-95fe-4403-896d-bf9fafbff04e.json b/data/hfopenllm_v2/prithivMLmods/Llama-3.1-8B-Open-SFT/37276848-95fe-4403-896d-bf9fafbff04e.json new file mode 100644 index 000000000..fd9ed6eb9 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Llama-3.1-8B-Open-SFT/37276848-95fe-4403-896d-bf9fafbff04e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-3.1-8B-Open-SFT/1762652580.464622", + "retrieved_timestamp": "1762652580.4646232", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Llama-3.1-8B-Open-SFT", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Llama-3.1-8B-Open-SFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4122616878770551 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4967982234773378 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1216012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39036458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35222739361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/LwQ-10B-Instruct/df470b21-0d55-4d28-af25-75908799a0cc.json b/data/hfopenllm_v2/prithivMLmods/LwQ-10B-Instruct/df470b21-0d55-4d28-af25-75908799a0cc.json new file mode 100644 index 000000000..eb7a36f50 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/LwQ-10B-Instruct/df470b21-0d55-4d28-af25-75908799a0cc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_LwQ-10B-Instruct/1762652580.4662411", + "retrieved_timestamp": "1762652580.466242", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/LwQ-10B-Instruct", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/LwQ-10B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3934770852449279 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5121712029712329 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45439583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.331781914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/LwQ-Reasoner-10B/d22507ab-2601-4bf0-a8d8-b456102c85af.json b/data/hfopenllm_v2/prithivMLmods/LwQ-Reasoner-10B/d22507ab-2601-4bf0-a8d8-b456102c85af.json new file mode 100644 index 000000000..0e343e47c --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/LwQ-Reasoner-10B/d22507ab-2601-4bf0-a8d8-b456102c85af.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_LwQ-Reasoner-10B/1762652580.466471", + "retrieved_timestamp": "1762652580.466471", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/LwQ-Reasoner-10B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/LwQ-Reasoner-10B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29413400887423147 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5866254169962443 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3580060422960725 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3464765100671141 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40785416666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41472739361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Magellanic-Opus-14B-Exp/07236482-8709-4aa8-8e63-762b2f591b2a.json b/data/hfopenllm_v2/prithivMLmods/Magellanic-Opus-14B-Exp/07236482-8709-4aa8-8e63-762b2f591b2a.json new file mode 100644 index 000000000..0950d8ec9 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Magellanic-Opus-14B-Exp/07236482-8709-4aa8-8e63-762b2f591b2a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Magellanic-Opus-14B-Exp/1762652580.466739", + "retrieved_timestamp": "1762652580.466739", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Magellanic-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Magellanic-Opus-14B-Exp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6866347956754744 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6382505935140227 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37990936555891236 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37416107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49262500000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5272606382978723 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp.v2/f50a6538-057e-4e57-af79-ba3a5b7121cb.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp.v2/f50a6538-057e-4e57-af79-ba3a5b7121cb.json new file mode 100644 index 000000000..9a7088383 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp.v2/f50a6538-057e-4e57-af79-ba3a5b7121cb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Corpus-14B-Exp.v2/1762652580.467396", + "retrieved_timestamp": "1762652580.4673972", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Megatron-Corpus-14B-Exp.v2", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Megatron-Corpus-14B-Exp.v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48704991644392437 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.632146083740281 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2590634441087613 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3422818791946309 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.449 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48096742021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp/f71c4189-288e-4c6d-978c-d793ca57fedf.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp/f71c4189-288e-4c6d-978c-d793ca57fedf.json new file mode 100644 index 000000000..855553925 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Megatron-Corpus-14B-Exp/f71c4189-288e-4c6d-978c-d793ca57fedf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Corpus-14B-Exp/1762652580.46718", + "retrieved_timestamp": "1762652580.46718", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Megatron-Corpus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Megatron-Corpus-14B-Exp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49826571275327247 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6355171004470184 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3429003021148036 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36325503355704697 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4766875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5260139627659575 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.0/c6dd1b78-b487-4197-8a66-c364487ff6fb.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.0/c6dd1b78-b487-4197-8a66-c364487ff6fb.json new file mode 100644 index 000000000..c3d93f005 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.0/c6dd1b78-b487-4197-8a66-c364487ff6fb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Opus-14B-2.0/1762652580.467613", + "retrieved_timestamp": "1762652580.467613", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Megatron-Opus-14B-2.0", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Megatron-Opus-14B-2.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6693739278447852 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6870557211788685 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27794561933534745 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35906040268456374 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41403125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5170378989361702 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.1/002ba3ef-6ac7-4bdf-bd7d-42ef16aa7cc9.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.1/002ba3ef-6ac7-4bdf-bd7d-42ef16aa7cc9.json new file mode 100644 index 000000000..faf034046 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-2.1/002ba3ef-6ac7-4bdf-bd7d-42ef16aa7cc9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Opus-14B-2.1/1762652580.4678242", + "retrieved_timestamp": "1762652580.467825", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Megatron-Opus-14B-2.1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Megatron-Opus-14B-2.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02455484780382718 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6726960005125086 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2998489425981873 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38338926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49275 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5173703457446809 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 14.66 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Exp/ac65fabb-07d5-457d-844e-19aecf2b18e0.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Exp/ac65fabb-07d5-457d-844e-19aecf2b18e0.json new file mode 100644 index 000000000..6926b329f --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Exp/ac65fabb-07d5-457d-844e-19aecf2b18e0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Opus-14B-Exp/1762652580.46803", + "retrieved_timestamp": "1762652580.468031", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Megatron-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Megatron-Opus-14B-Exp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4979410187192206 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6516090109599467 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35347432024169184 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48865625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5400598404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Stock/8a0828ef-56a0-4c2b-bc61-f955c56b7700.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Stock/8a0828ef-56a0-4c2b-bc61-f955c56b7700.json new file mode 100644 index 000000000..0a23a8ab0 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-14B-Stock/8a0828ef-56a0-4c2b-bc61-f955c56b7700.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Opus-14B-Stock/1762652580.468238", + "retrieved_timestamp": "1762652580.468238", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Megatron-Opus-14B-Stock", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Megatron-Opus-14B-Stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5173750094194515 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6411753580495262 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33459214501510576 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.375 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48202083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5293384308510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-7B-Exp/94536d01-2de8-4305-83aa-2673a226ab64.json b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-7B-Exp/94536d01-2de8-4305-83aa-2673a226ab64.json new file mode 100644 index 000000000..1653a0695 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Megatron-Opus-7B-Exp/94536d01-2de8-4305-83aa-2673a226ab64.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Opus-7B-Exp/1762652580.468447", + "retrieved_timestamp": "1762652580.468448", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Megatron-Opus-7B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Megatron-Opus-7B-Exp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6017300761978217 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5367154102661396 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1971299093655589 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4185833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3900432180851064 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Messier-Opus-14B-Elite7/e2ac8e52-8326-496a-b904-ca0e48190b3b.json b/data/hfopenllm_v2/prithivMLmods/Messier-Opus-14B-Elite7/e2ac8e52-8326-496a-b904-ca0e48190b3b.json new file mode 100644 index 000000000..2e3d9232b --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Messier-Opus-14B-Elite7/e2ac8e52-8326-496a-b904-ca0e48190b3b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Messier-Opus-14B-Elite7/1762652580.4686568", + "retrieved_timestamp": "1762652580.468658", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Messier-Opus-14B-Elite7", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Messier-Opus-14B-Elite7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7113392465325337 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6498611961862557 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4070996978851964 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39093959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4885625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5403922872340425 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Omni-Reasoner-Merged/8043bcfd-1a4c-45c5-aca4-f23f02bd5562.json b/data/hfopenllm_v2/prithivMLmods/Omni-Reasoner-Merged/8043bcfd-1a4c-45c5-aca4-f23f02bd5562.json new file mode 100644 index 000000000..de26d865b --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Omni-Reasoner-Merged/8043bcfd-1a4c-45c5-aca4-f23f02bd5562.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Omni-Reasoner-Merged/1762652580.468864", + "retrieved_timestamp": "1762652580.468864", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Omni-Reasoner-Merged", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Omni-Reasoner-Merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4599473840520929 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5507848245879011 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3330815709969788 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4616458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43641954787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Omni-Reasoner3-Merged/972cdfdc-1c7f-4900-8acf-d5eed0ccc968.json b/data/hfopenllm_v2/prithivMLmods/Omni-Reasoner3-Merged/972cdfdc-1c7f-4900-8acf-d5eed0ccc968.json new file mode 100644 index 000000000..9a5bf5ddd --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Omni-Reasoner3-Merged/972cdfdc-1c7f-4900-8acf-d5eed0ccc968.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Omni-Reasoner3-Merged/1762652580.46908", + "retrieved_timestamp": "1762652580.4690812", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Omni-Reasoner3-Merged", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Omni-Reasoner3-Merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.493469549683728 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4387847138827546 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10876132930513595 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35222916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29496343085106386 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Pegasus-Opus-14B-Exp/5cc40900-fe74-469a-99c0-74e998b0e316.json b/data/hfopenllm_v2/prithivMLmods/Pegasus-Opus-14B-Exp/5cc40900-fe74-469a-99c0-74e998b0e316.json new file mode 100644 index 000000000..15c14033e --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Pegasus-Opus-14B-Exp/5cc40900-fe74-469a-99c0-74e998b0e316.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Pegasus-Opus-14B-Exp/1762652580.469298", + "retrieved_timestamp": "1762652580.4692988", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Pegasus-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Pegasus-Opus-14B-Exp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6981752860188744 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6547548394062034 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4086102719033233 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3951342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4859583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5412234042553191 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Porpoise-Opus-14B-Exp/79832ae5-0a80-4e46-8175-4baa240dc4d9.json b/data/hfopenllm_v2/prithivMLmods/Porpoise-Opus-14B-Exp/79832ae5-0a80-4e46-8175-4baa240dc4d9.json new file mode 100644 index 000000000..1ab31f367 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Porpoise-Opus-14B-Exp/79832ae5-0a80-4e46-8175-4baa240dc4d9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Porpoise-Opus-14B-Exp/1762652580.47141", + "retrieved_timestamp": "1762652580.471411", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Porpoise-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Porpoise-Opus-14B-Exp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7098155117310957 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6518903547146537 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4040785498489426 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3934563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4925625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5396442819148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v1/94c21b1f-ce8d-4488-a1d1-2769d34f29ec.json b/data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v1/94c21b1f-ce8d-4488-a1d1-2769d34f29ec.json new file mode 100644 index 000000000..d5019e015 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v1/94c21b1f-ce8d-4488-a1d1-2769d34f29ec.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Primal-Opus-14B-Optimus-v1/1762652580.4716318", + "retrieved_timestamp": "1762652580.471633", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Primal-Opus-14B-Optimus-v1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Primal-Opus-14B-Optimus-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5013131823561483 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6419423743359406 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.338368580060423 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3724832214765101 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48471875000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5259308510638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v2/80407172-765a-4aa9-b189-a322150b1a7b.json b/data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v2/80407172-765a-4aa9-b189-a322150b1a7b.json new file mode 100644 index 000000000..41cda8097 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Primal-Opus-14B-Optimus-v2/80407172-765a-4aa9-b189-a322150b1a7b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Primal-Opus-14B-Optimus-v2/1762652580.471854", + "retrieved_timestamp": "1762652580.471854", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Primal-Opus-14B-Optimus-v2", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Primal-Opus-14B-Optimus-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6403730989330532 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6543780845512958 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4206948640483384 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39177852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48998958333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.542220744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-14B-Conversational/71114773-e285-4666-ae7f-5fd7c9084104.json b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-14B-Conversational/71114773-e285-4666-ae7f-5fd7c9084104.json new file mode 100644 index 000000000..c94ffd02c --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-14B-Conversational/71114773-e285-4666-ae7f-5fd7c9084104.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-LCoT-14B-Conversational/1762652580.472128", + "retrieved_timestamp": "1762652580.472129", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/QwQ-LCoT-14B-Conversational", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/QwQ-LCoT-14B-Conversational" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4047427492386867 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6239828933798323 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4652567975830816 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3498322147651007 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48471875000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.527842420212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-3B-Instruct/87fc8696-17f1-4a86-8d0d-f5b124144384.json b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-3B-Instruct/87fc8696-17f1-4a86-8d0d-f5b124144384.json new file mode 100644 index 000000000..57f017db8 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-3B-Instruct/87fc8696-17f1-4a86-8d0d-f5b124144384.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-LCoT-3B-Instruct/1762652580.47235", + "retrieved_timestamp": "1762652580.472351", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/QwQ-LCoT-3B-Instruct", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/QwQ-LCoT-3B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4354424039326764 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47629783868435643 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2824773413897281 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43579166666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3582114361702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-7B-Instruct/23f056f6-67dd-41fd-b1af-a1cf9abf784c.json b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-7B-Instruct/23f056f6-67dd-41fd-b1af-a1cf9abf784c.json new file mode 100644 index 000000000..a45b5fea3 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT-7B-Instruct/23f056f6-67dd-41fd-b1af-a1cf9abf784c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-LCoT-7B-Instruct/1762652580.4725702", + "retrieved_timestamp": "1762652580.472571", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/QwQ-LCoT-7B-Instruct", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/QwQ-LCoT-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4986901421561457 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5466466326018563 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3716012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4801875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4334275265957447 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT1-Merged/34aec318-6db4-4df6-9d6a-ad15e353f36a.json b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT1-Merged/34aec318-6db4-4df6-9d6a-ad15e353f36a.json new file mode 100644 index 000000000..ea824d99c --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT1-Merged/34aec318-6db4-4df6-9d6a-ad15e353f36a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-LCoT1-Merged/1762652580.47278", + "retrieved_timestamp": "1762652580.472781", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/QwQ-LCoT1-Merged", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/QwQ-LCoT1-Merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47513486438206187 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.548095531408024 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3731117824773414 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46961458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4357546542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT2-7B-Instruct/8c05d496-c21f-4a70-b312-1c1ba37d877a.json b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT2-7B-Instruct/8c05d496-c21f-4a70-b312-1c1ba37d877a.json new file mode 100644 index 000000000..9d34d70f1 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/QwQ-LCoT2-7B-Instruct/8c05d496-c21f-4a70-b312-1c1ba37d877a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-LCoT2-7B-Instruct/1762652580.473001", + "retrieved_timestamp": "1762652580.473002", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/QwQ-LCoT2-7B-Instruct", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/QwQ-LCoT2-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5561177675235043 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5424862934133593 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3270392749244713 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4564375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4341755319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-MathOct-7B/e703fed7-cf06-4caa-b78f-3e398b437671.json b/data/hfopenllm_v2/prithivMLmods/QwQ-MathOct-7B/e703fed7-cf06-4caa-b78f-3e398b437671.json new file mode 100644 index 000000000..f9337fd6b --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/QwQ-MathOct-7B/e703fed7-cf06-4caa-b78f-3e398b437671.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-MathOct-7B/1762652580.473228", + "retrieved_timestamp": "1762652580.4732292", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/QwQ-MathOct-7B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/QwQ-MathOct-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4684404047926169 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5485512215016556 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29531722054380666 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4600625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4330119680851064 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-1.5B-CoT/8dd67de7-0d3b-4359-b390-d90c609dea5a.json b/data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-1.5B-CoT/8dd67de7-0d3b-4359-b390-d90c609dea5a.json new file mode 100644 index 000000000..88b0a1bf3 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-1.5B-CoT/8dd67de7-0d3b-4359-b390-d90c609dea5a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-R1-Distill-1.5B-CoT/1762652580.4734771", + "retrieved_timestamp": "1762652580.473483", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/QwQ-R1-Distill-1.5B-CoT", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/QwQ-R1-Distill-1.5B-CoT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21939564799177294 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36662076641982305 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33459214501510576 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34339583333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19132313829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-7B-CoT/a723f173-af0e-4172-a43c-278ccbacac18.json b/data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-7B-CoT/a723f173-af0e-4172-a43c-278ccbacac18.json new file mode 100644 index 000000000..38c9999c2 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/QwQ-R1-Distill-7B-CoT/a723f173-af0e-4172-a43c-278ccbacac18.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-R1-Distill-7B-CoT/1762652580.473804", + "retrieved_timestamp": "1762652580.473805", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/QwQ-R1-Distill-7B-CoT", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/QwQ-R1-Distill-7B-CoT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3500378994401522 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.438788672517715 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46827794561933533 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37790624999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2804188829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct/b1430f51-cd48-4feb-8d94-c2a9a60f00bc.json b/data/hfopenllm_v2/prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct/b1430f51-cd48-4feb-8d94-c2a9a60f00bc.json new file mode 100644 index 000000000..b589297eb --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct/b1430f51-cd48-4feb-8d94-c2a9a60f00bc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Qwen2.5-1.5B-DeepSeek-R1-Instruct/1762652580.474298", + "retrieved_timestamp": "1762652580.474299", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13968603305895025 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28243669901671337 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3723541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11228390957446809 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/SmolLM2-CoT-360M/8ce4dea8-d674-4b95-b025-0c6ab60f6544.json b/data/hfopenllm_v2/prithivMLmods/SmolLM2-CoT-360M/8ce4dea8-d674-4b95-b025-0c6ab60f6544.json new file mode 100644 index 000000000..cbfaa55f9 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/SmolLM2-CoT-360M/8ce4dea8-d674-4b95-b025-0c6ab60f6544.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_SmolLM2-CoT-360M/1762652580.475137", + "retrieved_timestamp": "1762652580.475137", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/SmolLM2-CoT-360M", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/SmolLM2-CoT-360M" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22156877086131466 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31352960121180296 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02039274924471299 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23657718120805368 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3793958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1085438829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.362 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite5/3b12518e-ef16-4a72-89bb-071802ca636c.json b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite5/3b12518e-ef16-4a72-89bb-071802ca636c.json new file mode 100644 index 000000000..98d87c87a --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite5/3b12518e-ef16-4a72-89bb-071802ca636c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Elite5/1762652580.4753642", + "retrieved_timestamp": "1762652580.4753652", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Sombrero-Opus-14B-Elite5", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Sombrero-Opus-14B-Elite5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7880756393037142 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6501539892126272 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5354984894259819 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33640939597315433 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4286666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.520029920212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite6/0d354980-9f24-4b79-afb7-a7e6f52e8131.json b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite6/0d354980-9f24-4b79-afb7-a7e6f52e8131.json new file mode 100644 index 000000000..5f58df3ea --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Elite6/0d354980-9f24-4b79-afb7-a7e6f52e8131.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Elite6/1762652580.47572", + "retrieved_timestamp": "1762652580.475722", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Sombrero-Opus-14B-Elite6", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Sombrero-Opus-14B-Elite6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7226049105262924 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6487937804559186 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3934563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48859375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5389793882978723 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm1/5ce1b22c-7daa-4714-a774-d7d509fa869f.json b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm1/5ce1b22c-7daa-4714-a774-d7d509fa869f.json new file mode 100644 index 000000000..5ddefbcf8 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm1/5ce1b22c-7daa-4714-a774-d7d509fa869f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Sm1/1762652580.476064", + "retrieved_timestamp": "1762652580.476065", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Sombrero-Opus-14B-Sm1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Sombrero-Opus-14B-Sm1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3812872068334242 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.635462046379832 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4035234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5298958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.512466755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm2/6a1519e9-062b-454f-97cb-e57454f74e9a.json b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm2/6a1519e9-062b-454f-97cb-e57454f74e9a.json new file mode 100644 index 000000000..562e8122d --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm2/6a1519e9-062b-454f-97cb-e57454f74e9a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Sm2/1762652580.476301", + "retrieved_timestamp": "1762652580.4763021", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Sombrero-Opus-14B-Sm2", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Sombrero-Opus-14B-Sm2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4272242095417935 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6609367219259568 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.486404833836858 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3884228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5088125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5344913563829787 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm4/79a8057c-0791-42d6-adef-924a9cff0917.json b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm4/79a8057c-0791-42d6-adef-924a9cff0917.json new file mode 100644 index 000000000..3982bb4cf --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm4/79a8057c-0791-42d6-adef-924a9cff0917.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Sm4/1762652580.476516", + "retrieved_timestamp": "1762652580.4765172", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Sombrero-Opus-14B-Sm4", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Sombrero-Opus-14B-Sm4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4346932804957513 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6612776404137711 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4879154078549849 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3951342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5191666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5300033244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm5/41acaa59-3232-4c6c-be64-0acb38019405.json b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm5/41acaa59-3232-4c6c-be64-0acb38019405.json new file mode 100644 index 000000000..23196e8a6 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Sombrero-Opus-14B-Sm5/41acaa59-3232-4c6c-be64-0acb38019405.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Sm5/1762652580.476726", + "retrieved_timestamp": "1762652580.476726", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Sombrero-Opus-14B-Sm5", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Sombrero-Opus-14B-Sm5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6851609285584471 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6563944936055776 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4093655589123867 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38674496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.480625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5399767287234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Sqweeks-7B-Instruct/e0eaf433-d842-47c2-b47f-9e0ddd95df72.json b/data/hfopenllm_v2/prithivMLmods/Sqweeks-7B-Instruct/e0eaf433-d842-47c2-b47f-9e0ddd95df72.json new file mode 100644 index 000000000..289e31918 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Sqweeks-7B-Instruct/e0eaf433-d842-47c2-b47f-9e0ddd95df72.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Sqweeks-7B-Instruct/1762652580.476933", + "retrieved_timestamp": "1762652580.476934", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Sqweeks-7B-Instruct", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Sqweeks-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21579852568961466 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4666692459456812 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5143504531722054 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44760416666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3133311170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Tadpole-Opus-14B-Exp/0faf87d0-2b35-4256-acd9-4fe57f574d06.json b/data/hfopenllm_v2/prithivMLmods/Tadpole-Opus-14B-Exp/0faf87d0-2b35-4256-acd9-4fe57f574d06.json new file mode 100644 index 000000000..dbf13854d --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Tadpole-Opus-14B-Exp/0faf87d0-2b35-4256-acd9-4fe57f574d06.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Tadpole-Opus-14B-Exp/1762652580.477141", + "retrieved_timestamp": "1762652580.477142", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Tadpole-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Tadpole-Opus-14B-Exp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5749522378400422 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.636858708544215 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31344410876132933 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3859060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47284375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5322473404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Taurus-Opus-7B/01448351-5f76-4329-9bfd-4124e29de920.json b/data/hfopenllm_v2/prithivMLmods/Taurus-Opus-7B/01448351-5f76-4329-9bfd-4124e29de920.json new file mode 100644 index 000000000..176e9867c --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Taurus-Opus-7B/01448351-5f76-4329-9bfd-4124e29de920.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Taurus-Opus-7B/1762652580.477352", + "retrieved_timestamp": "1762652580.4773529", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Taurus-Opus-7B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Taurus-Opus-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42232831110342783 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5367364587851736 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21676737160120846 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3263422818791946 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43988541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3951130319148936 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Triangulum-10B/ee5ad026-8df4-41c0-9158-3759d4a3ef02.json b/data/hfopenllm_v2/prithivMLmods/Triangulum-10B/ee5ad026-8df4-41c0-9158-3759d4a3ef02.json new file mode 100644 index 000000000..e35838260 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Triangulum-10B/ee5ad026-8df4-41c0-9158-3759d4a3ef02.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Triangulum-10B/1762652580.477568", + "retrieved_timestamp": "1762652580.477569", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Triangulum-10B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Triangulum-10B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3229353670483207 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5968023910391113 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3549848942598187 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3540268456375839 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41724999999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4178025265957447 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Triangulum-5B/7d8850c3-61b2-41c3-a01b-8e23511558f6.json b/data/hfopenllm_v2/prithivMLmods/Triangulum-5B/7d8850c3-61b2-41c3-a01b-8e23511558f6.json new file mode 100644 index 000000000..a621fa397 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Triangulum-5B/7d8850c3-61b2-41c3-a01b-8e23511558f6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Triangulum-5B/1762652580.477782", + "retrieved_timestamp": "1762652580.477782", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Triangulum-5B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Triangulum-5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1283206336963701 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3124115848614622 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2550335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3445416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12234042553191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 5.413 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Triangulum-v2-10B/00f8547d-4bb9-4510-a29c-c37376c274c8.json b/data/hfopenllm_v2/prithivMLmods/Triangulum-v2-10B/00f8547d-4bb9-4510-a29c-c37376c274c8.json new file mode 100644 index 000000000..5b10da890 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Triangulum-v2-10B/00f8547d-4bb9-4510-a29c-c37376c274c8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Triangulum-v2-10B/1762652580.478046", + "retrieved_timestamp": "1762652580.478047", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Triangulum-v2-10B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Triangulum-v2-10B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6705231009277606 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6064531367418446 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24471299093655588 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.337248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42807291666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44664228723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Tucana-Opus-14B-r999/f24694aa-cfe7-4a58-9f9e-f02c3e51d198.json b/data/hfopenllm_v2/prithivMLmods/Tucana-Opus-14B-r999/f24694aa-cfe7-4a58-9f9e-f02c3e51d198.json new file mode 100644 index 000000000..d7e9c82f4 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Tucana-Opus-14B-r999/f24694aa-cfe7-4a58-9f9e-f02c3e51d198.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Tucana-Opus-14B-r999/1762652580.47826", + "retrieved_timestamp": "1762652580.478261", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Tucana-Opus-14B-r999", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Tucana-Opus-14B-r999" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.606725710005009 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6556888858891955 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39177852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47303125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5383976063829787 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Tulu-MathLingo-8B/fa0776bd-e95e-4d54-9004-82dff09307b8.json b/data/hfopenllm_v2/prithivMLmods/Tulu-MathLingo-8B/fa0776bd-e95e-4d54-9004-82dff09307b8.json new file mode 100644 index 000000000..38f3b857d --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Tulu-MathLingo-8B/fa0776bd-e95e-4d54-9004-82dff09307b8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Tulu-MathLingo-8B/1762652580.478472", + "retrieved_timestamp": "1762652580.478473", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Tulu-MathLingo-8B", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Tulu-MathLingo-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5589402784611497 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4658807905856453 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14501510574018128 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38642708333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.304438164893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-7B-Elite14/06bc6426-310b-40ac-bbeb-0460215b8981.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-7B-Elite14/06bc6426-310b-40ac-bbeb-0460215b8981.json new file mode 100644 index 000000000..bb7379472 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-7B-Elite14/06bc6426-310b-40ac-bbeb-0460215b8981.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-7B-Elite14/1762652580.4786801", + "retrieved_timestamp": "1762652580.478681", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Viper-Coder-7B-Elite14", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Viper-Coder-7B-Elite14" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14882844186757802 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28285388717732607 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2550335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34215625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10887632978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.2/1f235238-05e0-4c76-b136-0bf0cf470ba2.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.2/1f235238-05e0-4c76-b136-0bf0cf470ba2.json new file mode 100644 index 000000000..f2b0ebca6 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.2/1f235238-05e0-4c76-b136-0bf0cf470ba2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-Hybrid-v1.2/1762652580.4788852", + "retrieved_timestamp": "1762652580.478886", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Viper-Coder-Hybrid-v1.2", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Viper-Coder-Hybrid-v1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6735705705306365 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6390749226915919 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3330815709969788 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37416107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48217708333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5242686170212766 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.3/17167e2a-1f42-4ea9-a947-8749259738a8.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.3/17167e2a-1f42-4ea9-a947-8749259738a8.json new file mode 100644 index 000000000..76f152645 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-Hybrid-v1.3/17167e2a-1f42-4ea9-a947-8749259738a8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-Hybrid-v1.3/1762652580.4790971", + "retrieved_timestamp": "1762652580.479098", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Viper-Coder-Hybrid-v1.3", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Viper-Coder-Hybrid-v1.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7554776880898239 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6470999423290662 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4516616314199396 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33808724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4403229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5097240691489362 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-HybridMini-v1.3/1ca04810-a377-4390-944a-1a4ec91a7962.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-HybridMini-v1.3/1ca04810-a377-4390-944a-1a4ec91a7962.json new file mode 100644 index 000000000..1950619ab --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-HybridMini-v1.3/1ca04810-a377-4390-944a-1a4ec91a7962.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-HybridMini-v1.3/1762652580.4793081", + "retrieved_timestamp": "1762652580.479309", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Viper-Coder-HybridMini-v1.3", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Viper-Coder-HybridMini-v1.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.610372699991578 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5365472959273401 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46299093655589124 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45048958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4351728723404255 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v0.1/4d801ab4-0c2d-445a-beb6-4de824618e75.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v0.1/4d801ab4-0c2d-445a-beb6-4de824618e75.json new file mode 100644 index 000000000..182cbbf37 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v0.1/4d801ab4-0c2d-445a-beb6-4de824618e75.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-v0.1/1762652580.479637", + "retrieved_timestamp": "1762652580.479639", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Viper-Coder-v0.1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Viper-Coder-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5521460835028835 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6143056870893655 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3270392749244713 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3540268456375839 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43944791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3927859042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.1/cc8e5b55-5b48-40c3-9e30-3c1740bc7da2.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.1/cc8e5b55-5b48-40c3-9e30-3c1740bc7da2.json new file mode 100644 index 000000000..f0e142aa8 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.1/cc8e5b55-5b48-40c3-9e30-3c1740bc7da2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-v1.1/1762652580.479969", + "retrieved_timestamp": "1762652580.47997", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Viper-Coder-v1.1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Viper-Coder-v1.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.443236168920686 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6492289468853992 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5460725075528701 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.401006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5219270833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.523188164893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.6-r999/ff5bb366-3692-441c-8e8f-8c23c5143aae.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.6-r999/ff5bb366-3692-441c-8e8f-8c23c5143aae.json new file mode 100644 index 000000000..ea8d60b12 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.6-r999/ff5bb366-3692-441c-8e8f-8c23c5143aae.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-v1.6-r999/1762652580.480214", + "retrieved_timestamp": "1762652580.480215", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Viper-Coder-v1.6-r999", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Viper-Coder-v1.6-r999" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4432860366050967 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6492289468853992 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5657099697885196 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.401006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5219270833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.523188164893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.7-Vsm6/14b789c6-8b7f-4292-8ced-279e7ee856a5.json b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.7-Vsm6/14b789c6-8b7f-4292-8ced-279e7ee856a5.json new file mode 100644 index 000000000..4c66a818a --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Viper-Coder-v1.7-Vsm6/14b789c6-8b7f-4292-8ced-279e7ee856a5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-v1.7-Vsm6/1762652580.480439", + "retrieved_timestamp": "1762652580.4804401", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Viper-Coder-v1.7-Vsm6", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Viper-Coder-v1.7-Vsm6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5003889679384035 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6502342489348574 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4645015105740181 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39681208053691275 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47675 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5287566489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Viper-OneCoder-UIGEN/5d22f1b7-c062-4c46-8da1-4c895fcf8b9c.json b/data/hfopenllm_v2/prithivMLmods/Viper-OneCoder-UIGEN/5d22f1b7-c062-4c46-8da1-4c895fcf8b9c.json new file mode 100644 index 000000000..33bee9596 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Viper-OneCoder-UIGEN/5d22f1b7-c062-4c46-8da1-4c895fcf8b9c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-OneCoder-UIGEN/1762652580.480654", + "retrieved_timestamp": "1762652580.480654", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Viper-OneCoder-UIGEN", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Viper-OneCoder-UIGEN" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4691895282295421 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6046507657311738 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3867069486404834 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3422818791946309 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45141666666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.390375664893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/Volans-Opus-14B-Exp/735058a7-c22e-42a7-94f5-d7e2459848b3.json b/data/hfopenllm_v2/prithivMLmods/Volans-Opus-14B-Exp/735058a7-c22e-42a7-94f5-d7e2459848b3.json new file mode 100644 index 000000000..d69d4cf5d --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/Volans-Opus-14B-Exp/735058a7-c22e-42a7-94f5-d7e2459848b3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_Volans-Opus-14B-Exp/1762652580.480862", + "retrieved_timestamp": "1762652580.480863", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/Volans-Opus-14B-Exp", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/Volans-Opus-14B-Exp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5867675545330834 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6521211711040636 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.425226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3850671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4871979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5384807180851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/prithivMLmods/WebMind-7B-v0.1/00637ba6-99e5-4940-94ab-a620ff248ca1.json b/data/hfopenllm_v2/prithivMLmods/WebMind-7B-v0.1/00637ba6-99e5-4940-94ab-a620ff248ca1.json new file mode 100644 index 000000000..777381a01 --- /dev/null +++ b/data/hfopenllm_v2/prithivMLmods/WebMind-7B-v0.1/00637ba6-99e5-4940-94ab-a620ff248ca1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/prithivMLmods_WebMind-7B-v0.1/1762652580.481075", + "retrieved_timestamp": "1762652580.481076", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "prithivMLmods/WebMind-7B-v0.1", + "developer": "prithivMLmods", + "inference_platform": "unknown", + "id": "prithivMLmods/WebMind-7B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5278161943642867 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5433559211614739 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3648036253776435 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4537395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4279421542553192 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2019/Oracle-14B/90a36ffd-8eeb-44e8-9b7b-dbd56238d0a6.json b/data/hfopenllm_v2/qingy2019/Oracle-14B/90a36ffd-8eeb-44e8-9b7b-dbd56238d0a6.json new file mode 100644 index 000000000..f0d5a659a --- /dev/null +++ b/data/hfopenllm_v2/qingy2019/Oracle-14B/90a36ffd-8eeb-44e8-9b7b-dbd56238d0a6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2019_Oracle-14B/1762652580.4822989", + "retrieved_timestamp": "1762652580.4822989", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2019/Oracle-14B", + "developer": "qingy2019", + "inference_platform": "unknown", + "id": "qingy2019/Oracle-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23583203677353867 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4611577021562399 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06419939577039276 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2575503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37166666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23819813829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 13.668 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2019/Oracle-14B/fc5c5eff-8314-4cb2-8ba4-b562096cfe1f.json b/data/hfopenllm_v2/qingy2019/Oracle-14B/fc5c5eff-8314-4cb2-8ba4-b562096cfe1f.json new file mode 100644 index 000000000..6fa503403 --- /dev/null +++ b/data/hfopenllm_v2/qingy2019/Oracle-14B/fc5c5eff-8314-4cb2-8ba4-b562096cfe1f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2019_Oracle-14B/1762652580.482562", + "retrieved_timestamp": "1762652580.482562", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2019/Oracle-14B", + "developer": "qingy2019", + "inference_platform": "unknown", + "id": "qingy2019/Oracle-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24007854714380067 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4622299618883472 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07250755287009064 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37033333333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2378656914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 13.668 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Alpha/7bc9676d-6186-4b2d-8b4b-4a3786f3ed40.json b/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Alpha/7bc9676d-6186-4b2d-8b4b-4a3786f3ed40.json new file mode 100644 index 000000000..b8f8a5a4c --- /dev/null +++ b/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Alpha/7bc9676d-6186-4b2d-8b4b-4a3786f3ed40.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2019_Qwen2.5-Math-14B-Instruct-Alpha/1762652580.4831731", + "retrieved_timestamp": "1762652580.4831731", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2019/Qwen2.5-Math-14B-Instruct-Alpha", + "developer": "qingy2019", + "inference_platform": "unknown", + "id": "qingy2019/Qwen2.5-Math-14B-Instruct-Alpha" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5980830862112528 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6375080075350833 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31419939577039274 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3699664429530201 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4649375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5330784574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Pro/c1a0b34a-d3b5-42b9-b779-b31b9678faed.json b/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Pro/c1a0b34a-d3b5-42b9-b779-b31b9678faed.json new file mode 100644 index 000000000..0e746e807 --- /dev/null +++ b/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct-Pro/c1a0b34a-d3b5-42b9-b779-b31b9678faed.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2019_Qwen2.5-Math-14B-Instruct-Pro/1762652580.483387", + "retrieved_timestamp": "1762652580.483388", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2019/Qwen2.5-Math-14B-Instruct-Pro", + "developer": "qingy2019", + "inference_platform": "unknown", + "id": "qingy2019/Qwen2.5-Math-14B-Instruct-Pro" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1921678923035324 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5318689754519911 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37403125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35580119680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/46d47e9a-6378-4eb5-a43d-f8e6a7c51674.json b/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/46d47e9a-6378-4eb5-a43d-f8e6a7c51674.json new file mode 100644 index 000000000..8520f8182 --- /dev/null +++ b/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/46d47e9a-6378-4eb5-a43d-f8e6a7c51674.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2019_Qwen2.5-Math-14B-Instruct/1762652580.482764", + "retrieved_timestamp": "1762652580.482764", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2019/Qwen2.5-Math-14B-Instruct", + "developer": "qingy2019", + "inference_platform": "unknown", + "id": "qingy2019/Qwen2.5-Math-14B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6066259746361875 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6350068875885949 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3716012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3724832214765101 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4757291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5330784574468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/5a2e7119-5fe6-4d3c-8706-01e22ef5b121.json b/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/5a2e7119-5fe6-4d3c-8706-01e22ef5b121.json new file mode 100644 index 000000000..488958718 --- /dev/null +++ b/data/hfopenllm_v2/qingy2019/Qwen2.5-Math-14B-Instruct/5a2e7119-5fe6-4d3c-8706-01e22ef5b121.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2019_Qwen2.5-Math-14B-Instruct/1762652580.48299", + "retrieved_timestamp": "1762652580.4829912", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2019/Qwen2.5-Math-14B-Instruct", + "developer": "qingy2019", + "inference_platform": "unknown", + "id": "qingy2019/Qwen2.5-Math-14B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6005310354304356 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6356492397286339 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2764350453172205 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4756666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5339095744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2019/Qwen2.5-Ultimate-14B-Instruct/655920b7-5687-4555-8890-ab1d08f3f00d.json b/data/hfopenllm_v2/qingy2019/Qwen2.5-Ultimate-14B-Instruct/655920b7-5687-4555-8890-ab1d08f3f00d.json new file mode 100644 index 000000000..0c44a01a6 --- /dev/null +++ b/data/hfopenllm_v2/qingy2019/Qwen2.5-Ultimate-14B-Instruct/655920b7-5687-4555-8890-ab1d08f3f00d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2019_Qwen2.5-Ultimate-14B-Instruct/1762652580.483648", + "retrieved_timestamp": "1762652580.483649", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2019/Qwen2.5-Ultimate-14B-Instruct", + "developer": "qingy2019", + "inference_platform": "unknown", + "id": "qingy2019/Qwen2.5-Ultimate-14B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39380177927897975 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5841561592804249 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2892749244712991 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3565436241610738 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4135 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4929355053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Benchmaxx-Llama-3.2-1B-Instruct/52ed2d5b-d9be-4f3f-b193-8d4cca4ded62.json b/data/hfopenllm_v2/qingy2024/Benchmaxx-Llama-3.2-1B-Instruct/52ed2d5b-d9be-4f3f-b193-8d4cca4ded62.json new file mode 100644 index 000000000..8b01baf80 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Benchmaxx-Llama-3.2-1B-Instruct/52ed2d5b-d9be-4f3f-b193-8d4cca4ded62.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2024_Benchmaxx-Llama-3.2-1B-Instruct/1762652580.483871", + "retrieved_timestamp": "1762652580.483871", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2024/Benchmaxx-Llama-3.2-1B-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "id": "qingy2024/Benchmaxx-Llama-3.2-1B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20136016879657087 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8269136508088061 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48036253776435045 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2835570469798658 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3446354166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11128656914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Eyas-17B-Instruct/c45cc504-88b0-4110-9650-47f4d328f769.json b/data/hfopenllm_v2/qingy2024/Eyas-17B-Instruct/c45cc504-88b0-4110-9650-47f4d328f769.json new file mode 100644 index 000000000..11f33d55a --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Eyas-17B-Instruct/c45cc504-88b0-4110-9650-47f4d328f769.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2024_Eyas-17B-Instruct/1762652580.484141", + "retrieved_timestamp": "1762652580.484141", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2024/Eyas-17B-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "id": "qingy2024/Eyas-17B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6574588757829227 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6084550080292097 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24697885196374622 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45216666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43425864361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 17.431 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Falcon3-2x10B-MoE-Instruct/302e9f42-b9fa-4e2b-acda-70c391f9b6bc.json b/data/hfopenllm_v2/qingy2024/Falcon3-2x10B-MoE-Instruct/302e9f42-b9fa-4e2b-acda-70c391f9b6bc.json new file mode 100644 index 000000000..6ca528ac6 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Falcon3-2x10B-MoE-Instruct/302e9f42-b9fa-4e2b-acda-70c391f9b6bc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2024_Falcon3-2x10B-MoE-Instruct/1762652580.484361", + "retrieved_timestamp": "1762652580.484362", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2024/Falcon3-2x10B-MoE-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "id": "qingy2024/Falcon3-2x10B-MoE-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7849783020164276 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6184925726037823 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2794561933534743 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42835416666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44232047872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 18.799 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Fusion-14B-Instruct/123331fd-a4fb-4dc6-a30e-17f230618df9.json b/data/hfopenllm_v2/qingy2024/Fusion-14B-Instruct/123331fd-a4fb-4dc6-a30e-17f230618df9.json new file mode 100644 index 000000000..d54eb875f --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Fusion-14B-Instruct/123331fd-a4fb-4dc6-a30e-17f230618df9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2024_Fusion-14B-Instruct/1762652580.4845738", + "retrieved_timestamp": "1762652580.484575", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2024/Fusion-14B-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "id": "qingy2024/Fusion-14B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7259770741632203 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6395930812164231 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3368580060422961 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3548657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44004166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.504404920212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Fusion2-14B-Instruct/cc17acb9-0f4e-46a9-a250-eb79a0fedc3f.json b/data/hfopenllm_v2/qingy2024/Fusion2-14B-Instruct/cc17acb9-0f4e-46a9-a250-eb79a0fedc3f.json new file mode 100644 index 000000000..ae9e44fec --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Fusion2-14B-Instruct/cc17acb9-0f4e-46a9-a250-eb79a0fedc3f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2024_Fusion2-14B-Instruct/1762652580.4848042", + "retrieved_timestamp": "1762652580.4848042", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2024/Fusion2-14B-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "id": "qingy2024/Fusion2-14B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6064010159709571 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.611852372286455 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31268882175226587 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3447986577181208 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46338541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5050698138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Fusion4-14B-Instruct/bb7b828c-07a0-4530-8c2e-8e4b6370cbb4.json b/data/hfopenllm_v2/qingy2024/Fusion4-14B-Instruct/bb7b828c-07a0-4530-8c2e-8e4b6370cbb4.json new file mode 100644 index 000000000..e4b099f00 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Fusion4-14B-Instruct/bb7b828c-07a0-4530-8c2e-8e4b6370cbb4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2024_Fusion4-14B-Instruct/1762652580.4850292", + "retrieved_timestamp": "1762652580.48503", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2024/Fusion4-14B-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "id": "qingy2024/Fusion4-14B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7648949232480928 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6542520469477617 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38821752265861026 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4325729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5193650265957447 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/OwO-14B-Instruct/f524ebb6-64cb-43e3-8cff-6305ef122890.json b/data/hfopenllm_v2/qingy2024/OwO-14B-Instruct/f524ebb6-64cb-43e3-8cff-6305ef122890.json new file mode 100644 index 000000000..7c4acf0b8 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/OwO-14B-Instruct/f524ebb6-64cb-43e3-8cff-6305ef122890.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2024_OwO-14B-Instruct/1762652580.485259", + "retrieved_timestamp": "1762652580.485259", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2024/OwO-14B-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "id": "qingy2024/OwO-14B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1383119013107444 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6164807172760662 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4161631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3640939597315436 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44068749999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5181183510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/QwEnlarge-16B-Instruct/dd44686d-13da-4c88-81d3-6d01676baa4e.json b/data/hfopenllm_v2/qingy2024/QwEnlarge-16B-Instruct/dd44686d-13da-4c88-81d3-6d01676baa4e.json new file mode 100644 index 000000000..dae4c6c16 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/QwEnlarge-16B-Instruct/dd44686d-13da-4c88-81d3-6d01676baa4e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2024_QwEnlarge-16B-Instruct/1762652580.485478", + "retrieved_timestamp": "1762652580.4854789", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2024/QwEnlarge-16B-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "id": "qingy2024/QwEnlarge-16B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7801821389468832 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5949341698087998 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45996978851963743 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33305369127516776 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.410125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44755651595744683 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 15.871 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/QwQ-14B-Math-v0.2/4092651d-1d14-408d-922d-6189858aab36.json b/data/hfopenllm_v2/qingy2024/QwQ-14B-Math-v0.2/4092651d-1d14-408d-922d-6189858aab36.json new file mode 100644 index 000000000..fd7a4a01b --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/QwQ-14B-Math-v0.2/4092651d-1d14-408d-922d-6189858aab36.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2024_QwQ-14B-Math-v0.2/1762652580.48586", + "retrieved_timestamp": "1762652580.4858618", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2024/QwQ-14B-Math-v0.2", + "developer": "qingy2024", + "inference_platform": "unknown", + "id": "qingy2024/QwQ-14B-Math-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33909692948044523 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.573097955260854 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4811178247734139 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40209374999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47997007978723405 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Qwarkstar-4B-Instruct-Preview/701a4aa4-b057-42d8-8b89-dd59950d1981.json b/data/hfopenllm_v2/qingy2024/Qwarkstar-4B-Instruct-Preview/701a4aa4-b057-42d8-8b89-dd59950d1981.json new file mode 100644 index 000000000..42d5746ee --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Qwarkstar-4B-Instruct-Preview/701a4aa4-b057-42d8-8b89-dd59950d1981.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2024_Qwarkstar-4B-Instruct-Preview/1762652580.4865122", + "retrieved_timestamp": "1762652580.486513", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2024/Qwarkstar-4B-Instruct-Preview", + "developer": "qingy2024", + "inference_platform": "unknown", + "id": "qingy2024/Qwarkstar-4B-Instruct-Preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5324372664530114 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43584381808469397 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38959374999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.250249335106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 4.473 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Qwarkstar-4B/9f586b02-3514-46f7-b1df-4e78f286893e.json b/data/hfopenllm_v2/qingy2024/Qwarkstar-4B/9f586b02-3514-46f7-b1df-4e78f286893e.json new file mode 100644 index 000000000..25fb349a4 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Qwarkstar-4B/9f586b02-3514-46f7-b1df-4e78f286893e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2024_Qwarkstar-4B/1762652580.486229", + "retrieved_timestamp": "1762652580.4862301", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2024/Qwarkstar-4B", + "developer": "qingy2024", + "inference_platform": "unknown", + "id": "qingy2024/Qwarkstar-4B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19941200459225966 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40149118131308104 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08610271903323263 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44283333333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24251994680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 4.473 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct/40662202-f976-4dc0-acf2-f4794bb5d744.json b/data/hfopenllm_v2/qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct/40662202-f976-4dc0-acf2-f4794bb5d744.json new file mode 100644 index 000000000..dd23ddc7a --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct/40662202-f976-4dc0-acf2-f4794bb5d744.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.5-Coder-Draft-1.5B-Instruct/1762652580.487137", + "retrieved_timestamp": "1762652580.487138", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "id": "qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4125110262991086 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3836795503038973 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1578549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35800000000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22440159574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Alpha/011f32a0-458f-4bea-8192-b18a19ddd0c7.json b/data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Alpha/011f32a0-458f-4bea-8192-b18a19ddd0c7.json new file mode 100644 index 000000000..ab4c7d401 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Alpha/011f32a0-458f-4bea-8192-b18a19ddd0c7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.5-Math-14B-Instruct-Alpha/1762652580.48737", + "retrieved_timestamp": "1762652580.487371", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2024/Qwen2.5-Math-14B-Instruct-Alpha", + "developer": "qingy2024", + "inference_platform": "unknown", + "id": "qingy2024/Qwen2.5-Math-14B-Instruct-Alpha" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7704402097545624 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.646486159387426 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42900302114803623 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.348993288590604 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40209374999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49659242021276595 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Preview/aab84d55-c491-402c-9ed0-59347573fea9.json b/data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Preview/aab84d55-c491-402c-9ed0-59347573fea9.json new file mode 100644 index 000000000..d5c4aa56c --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Qwen2.5-Math-14B-Instruct-Preview/aab84d55-c491-402c-9ed0-59347573fea9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.5-Math-14B-Instruct-Preview/1762652580.487701", + "retrieved_timestamp": "1762652580.4877021", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2024/Qwen2.5-Math-14B-Instruct-Preview", + "developer": "qingy2024", + "inference_platform": "unknown", + "id": "qingy2024/Qwen2.5-Math-14B-Instruct-Preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7825802204816554 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6293942245934432 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47583081570996977 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34060402684563756 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4114583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49933510638297873 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Qwen2.6-14B-Instruct/c27064c4-93d1-41a1-a61f-cde7a991b047.json b/data/hfopenllm_v2/qingy2024/Qwen2.6-14B-Instruct/c27064c4-93d1-41a1-a61f-cde7a991b047.json new file mode 100644 index 000000000..79b7eb6d3 --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Qwen2.6-14B-Instruct/c27064c4-93d1-41a1-a61f-cde7a991b047.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.6-14B-Instruct/1762652580.48806", + "retrieved_timestamp": "1762652580.488061", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2024/Qwen2.6-14B-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "id": "qingy2024/Qwen2.6-14B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5810970447302047 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6394142844483001 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30513595166163143 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37919463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4569375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5285073138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/qingy2024/Qwen2.6-Math-14B-Instruct/37822fb0-4ada-4413-aa77-6938678994d9.json b/data/hfopenllm_v2/qingy2024/Qwen2.6-Math-14B-Instruct/37822fb0-4ada-4413-aa77-6938678994d9.json new file mode 100644 index 000000000..65752592e --- /dev/null +++ b/data/hfopenllm_v2/qingy2024/Qwen2.6-Math-14B-Instruct/37822fb0-4ada-4413-aa77-6938678994d9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.6-Math-14B-Instruct/1762652580.488592", + "retrieved_timestamp": "1762652580.4885938", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "qingy2024/Qwen2.6-Math-14B-Instruct", + "developer": "qingy2024", + "inference_platform": "unknown", + "id": "qingy2024/Qwen2.6-Math-14B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38623186478543603 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6324437508110833 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42900302114803623 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3699664429530201 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4758541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5241023936170213 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/raphgg/test-2.5-72B/133866e4-6e3a-4d88-95f3-d7e1bd414988.json b/data/hfopenllm_v2/raphgg/test-2.5-72B/133866e4-6e3a-4d88-95f3-d7e1bd414988.json new file mode 100644 index 000000000..d0cb03da0 --- /dev/null +++ b/data/hfopenllm_v2/raphgg/test-2.5-72B/133866e4-6e3a-4d88-95f3-d7e1bd414988.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/raphgg_test-2.5-72B/1762652580.489263", + "retrieved_timestamp": "1762652580.489265", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "raphgg/test-2.5-72B", + "developer": "raphgg", + "inference_platform": "unknown", + "id": "raphgg/test-2.5-72B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8437047035199936 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7266099425567868 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4108761329305136 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38926174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48118750000000005 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5836934840425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rasyosef/Mistral-NeMo-Minitron-8B-Chat/cb8d28e5-d423-4a62-8b73-7542fb990d8e.json b/data/hfopenllm_v2/rasyosef/Mistral-NeMo-Minitron-8B-Chat/cb8d28e5-d423-4a62-8b73-7542fb990d8e.json new file mode 100644 index 000000000..de266d28c --- /dev/null +++ b/data/hfopenllm_v2/rasyosef/Mistral-NeMo-Minitron-8B-Chat/cb8d28e5-d423-4a62-8b73-7542fb990d8e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rasyosef_Mistral-NeMo-Minitron-8B-Chat/1762652580.4896698", + "retrieved_timestamp": "1762652580.489672", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rasyosef/Mistral-NeMo-Minitron-8B-Chat", + "developer": "rasyosef", + "inference_platform": "unknown", + "id": "rasyosef/Mistral-NeMo-Minitron-8B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4451843331249973 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47594353379058535 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027190332326283987 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4304270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2403590425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 8.414 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rasyosef/Phi-1_5-Instruct-v0.1/e4d90e2b-f510-4941-8e10-be027693c3d4.json b/data/hfopenllm_v2/rasyosef/Phi-1_5-Instruct-v0.1/e4d90e2b-f510-4941-8e10-be027693c3d4.json new file mode 100644 index 000000000..386362584 --- /dev/null +++ b/data/hfopenllm_v2/rasyosef/Phi-1_5-Instruct-v0.1/e4d90e2b-f510-4941-8e10-be027693c3d4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rasyosef_Phi-1_5-Instruct-v0.1/1762652580.4902148", + "retrieved_timestamp": "1762652580.490216", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rasyosef/Phi-1_5-Instruct-v0.1", + "developer": "rasyosef", + "inference_platform": "unknown", + "id": "rasyosef/Phi-1_5-Instruct-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24022815019703275 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3117898107092894 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34215625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15616688829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "PhiForCausalLM", + "params_billions": 1.415 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rasyosef/phi-2-instruct-apo/f56f3dda-a774-45d7-b949-b5e04174a413.json b/data/hfopenllm_v2/rasyosef/phi-2-instruct-apo/f56f3dda-a774-45d7-b949-b5e04174a413.json new file mode 100644 index 000000000..1bcab473c --- /dev/null +++ b/data/hfopenllm_v2/rasyosef/phi-2-instruct-apo/f56f3dda-a774-45d7-b949-b5e04174a413.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rasyosef_phi-2-instruct-apo/1762652580.490494", + "retrieved_timestamp": "1762652580.490495", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rasyosef/phi-2-instruct-apo", + "developer": "rasyosef", + "inference_platform": "unknown", + "id": "rasyosef/phi-2-instruct-apo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31459194936102874 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44450964630048634 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.030211480362537766 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33421875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21550864361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.775 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rasyosef/phi-2-instruct-v0.1/556eef3e-7c58-446d-acc5-26af0413d2bc.json b/data/hfopenllm_v2/rasyosef/phi-2-instruct-v0.1/556eef3e-7c58-446d-acc5-26af0413d2bc.json new file mode 100644 index 000000000..f96f0075d --- /dev/null +++ b/data/hfopenllm_v2/rasyosef/phi-2-instruct-v0.1/556eef3e-7c58-446d-acc5-26af0413d2bc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rasyosef_phi-2-instruct-v0.1/1762652580.490772", + "retrieved_timestamp": "1762652580.490773", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rasyosef/phi-2-instruct-v0.1", + "developer": "rasyosef", + "inference_platform": "unknown", + "id": "rasyosef/phi-2-instruct-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3681476260765879 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47261184292654473 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3523541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22465093085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "PhiForCausalLM", + "params_billions": 2.775 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/realtreetune/rho-1b-sft-MATH/86234365-2d3e-4d49-96e8-8f034990c902.json b/data/hfopenllm_v2/realtreetune/rho-1b-sft-MATH/86234365-2d3e-4d49-96e8-8f034990c902.json new file mode 100644 index 000000000..0ee2b9e2f --- /dev/null +++ b/data/hfopenllm_v2/realtreetune/rho-1b-sft-MATH/86234365-2d3e-4d49-96e8-8f034990c902.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/realtreetune_rho-1b-sft-MATH/1762652580.4910588", + "retrieved_timestamp": "1762652580.49106", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "realtreetune/rho-1b-sft-MATH", + "developer": "realtreetune", + "inference_platform": "unknown", + "id": "realtreetune/rho-1b-sft-MATH" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.212101668018635 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3144153389594046 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03474320241691843 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34584375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11170212765957446 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.1 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS/60e8f886-62fa-444a-8193-273905cbd4e8.json b/data/hfopenllm_v2/redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS/60e8f886-62fa-444a-8193-273905cbd4e8.json new file mode 100644 index 000000000..ecffb74dd --- /dev/null +++ b/data/hfopenllm_v2/redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS/60e8f886-62fa-444a-8193-273905cbd4e8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/redrix_AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS/1762652580.493407", + "retrieved_timestamp": "1762652580.493408", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS", + "developer": "redrix", + "inference_platform": "unknown", + "id": "redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5359590331431713 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5128840998052852 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11329305135951662 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38178124999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3179853723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/redrix/patricide-12B-Unslop-Mell/16052a72-b235-47df-ac4c-fe54e49b9131.json b/data/hfopenllm_v2/redrix/patricide-12B-Unslop-Mell/16052a72-b235-47df-ac4c-fe54e49b9131.json new file mode 100644 index 000000000..a9d4ced15 --- /dev/null +++ b/data/hfopenllm_v2/redrix/patricide-12B-Unslop-Mell/16052a72-b235-47df-ac4c-fe54e49b9131.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/redrix_patricide-12B-Unslop-Mell/1762652580.4937751", + "retrieved_timestamp": "1762652580.4937768", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "redrix/patricide-12B-Unslop-Mell", + "developer": "redrix", + "inference_platform": "unknown", + "id": "redrix/patricide-12B-Unslop-Mell" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40739016919551235 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5398666865853622 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13141993957703926 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4025833333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3570478723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rhplus0831/maid-yuzu-v7/65e47b2d-982b-4fa8-b5bf-e002cf3cc293.json b/data/hfopenllm_v2/rhplus0831/maid-yuzu-v7/65e47b2d-982b-4fa8-b5bf-e002cf3cc293.json new file mode 100644 index 000000000..1d840db69 --- /dev/null +++ b/data/hfopenllm_v2/rhplus0831/maid-yuzu-v7/65e47b2d-982b-4fa8-b5bf-e002cf3cc293.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rhplus0831_maid-yuzu-v7/1762652580.494505", + "retrieved_timestamp": "1762652580.494506", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rhplus0831/maid-yuzu-v7", + "developer": "rhplus0831", + "inference_platform": "unknown", + "id": "rhplus0831/maid-yuzu-v7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6462430794735745 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.480491692312673 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10196374622356495 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41362499999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35397273936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rhymes-ai/Aria/611c449e-3d86-4dea-94a8-a2b7719fa1ae.json b/data/hfopenllm_v2/rhymes-ai/Aria/611c449e-3d86-4dea-94a8-a2b7719fa1ae.json new file mode 100644 index 000000000..44c8c4053 --- /dev/null +++ b/data/hfopenllm_v2/rhymes-ai/Aria/611c449e-3d86-4dea-94a8-a2b7719fa1ae.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rhymes-ai_Aria/1762652580.4949272", + "retrieved_timestamp": "1762652580.494928", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rhymes-ai/Aria", + "developer": "rhymes-ai", + "inference_platform": "unknown", + "id": "rhymes-ai/Aria" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4773079872516035 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5695312446413633 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1933534743202417 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3624161073825503 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44049202127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "AriaForConditionalGeneration", + "params_billions": 25.307 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rmdhirr/Gluon-8B/a1f5e06b-17f7-41d1-ab9d-c0e4b22d10cf.json b/data/hfopenllm_v2/rmdhirr/Gluon-8B/a1f5e06b-17f7-41d1-ab9d-c0e4b22d10cf.json new file mode 100644 index 000000000..220a39cca --- /dev/null +++ b/data/hfopenllm_v2/rmdhirr/Gluon-8B/a1f5e06b-17f7-41d1-ab9d-c0e4b22d10cf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rmdhirr_Gluon-8B/1762652580.496151", + "retrieved_timestamp": "1762652580.4961522", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rmdhirr/Gluon-8B", + "developer": "rmdhirr", + "inference_platform": "unknown", + "id": "rmdhirr/Gluon-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5052848663767692 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5153305292144984 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14425981873111782 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4038854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38081781914893614 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.6-Nemotron-70b/caf5de06-ab13-45e4-ac51-d4e40796952e.json b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.6-Nemotron-70b/caf5de06-ab13-45e4-ac51-d4e40796952e.json new file mode 100644 index 000000000..d02caeb71 --- /dev/null +++ b/data/hfopenllm_v2/rombodawg/Rombos-LLM-V2.6-Nemotron-70b/caf5de06-ab13-45e4-ac51-d4e40796952e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.6-Nemotron-70b/1762652580.499233", + "retrieved_timestamp": "1762652580.499234", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rombodawg/Rombos-LLM-V2.6-Nemotron-70b", + "developer": "rombodawg", + "inference_platform": "unknown", + "id": "rombodawg/Rombos-LLM-V2.6-Nemotron-70b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7526551771521784 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6937699482580332 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3330815709969788 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40604026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46686458333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5329122340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rombodawg/rombos_Replete-Coder-Instruct-8b-Merged/929abd2b-3f19-4df3-81ab-406751d52919.json b/data/hfopenllm_v2/rombodawg/rombos_Replete-Coder-Instruct-8b-Merged/929abd2b-3f19-4df3-81ab-406751d52919.json new file mode 100644 index 000000000..92aa193c0 --- /dev/null +++ b/data/hfopenllm_v2/rombodawg/rombos_Replete-Coder-Instruct-8b-Merged/929abd2b-3f19-4df3-81ab-406751d52919.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rombodawg_rombos_Replete-Coder-Instruct-8b-Merged/1762652580.499815", + "retrieved_timestamp": "1762652580.499816", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rombodawg/rombos_Replete-Coder-Instruct-8b-Merged", + "developer": "rombodawg", + "inference_platform": "unknown", + "id": "rombodawg/rombos_Replete-Coder-Instruct-8b-Merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5387571643239937 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4461693860075828 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07779456193353475 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36603125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18085106382978725 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rootxhacker/Apollo-70B/14421b7b-6f4d-4b4f-91e1-27a9c0919498.json b/data/hfopenllm_v2/rootxhacker/Apollo-70B/14421b7b-6f4d-4b4f-91e1-27a9c0919498.json new file mode 100644 index 000000000..cf85dff65 --- /dev/null +++ b/data/hfopenllm_v2/rootxhacker/Apollo-70B/14421b7b-6f4d-4b4f-91e1-27a9c0919498.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rootxhacker_Apollo-70B/1762652580.500333", + "retrieved_timestamp": "1762652580.500333", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rootxhacker/Apollo-70B", + "developer": "rootxhacker", + "inference_platform": "unknown", + "id": "rootxhacker/Apollo-70B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5098560707810831 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6804215148524603 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5611782477341389 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45721476510067116 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4947708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5279255319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 70.554 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rootxhacker/Apollo_v2-32B/2a3e824e-8fb2-41ac-b548-30ea18ecdceb.json b/data/hfopenllm_v2/rootxhacker/Apollo_v2-32B/2a3e824e-8fb2-41ac-b548-30ea18ecdceb.json new file mode 100644 index 000000000..c831cce6e --- /dev/null +++ b/data/hfopenllm_v2/rootxhacker/Apollo_v2-32B/2a3e824e-8fb2-41ac-b548-30ea18ecdceb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rootxhacker_Apollo_v2-32B/1762652580.500606", + "retrieved_timestamp": "1762652580.500606", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rootxhacker/Apollo_v2-32B", + "developer": "rootxhacker", + "inference_platform": "unknown", + "id": "rootxhacker/Apollo_v2-32B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4280486885907171 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7072274795963693 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42749244712990936 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3783557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4993854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5869348404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rootxhacker/apollo-7B/ce364468-f5ef-4a29-8026-89e455fa4350.json b/data/hfopenllm_v2/rootxhacker/apollo-7B/ce364468-f5ef-4a29-8026-89e455fa4350.json new file mode 100644 index 000000000..2d8704f45 --- /dev/null +++ b/data/hfopenllm_v2/rootxhacker/apollo-7B/ce364468-f5ef-4a29-8026-89e455fa4350.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rootxhacker_apollo-7B/1762652580.500841", + "retrieved_timestamp": "1762652580.500842", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rootxhacker/apollo-7B", + "developer": "rootxhacker", + "inference_platform": "unknown", + "id": "rootxhacker/apollo-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29533304964161755 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3636262699883149 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0256797583081571 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41312499999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17478390957446807 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B/18284816-2f69-41c5-8cf3-5209ed77cb7d.json b/data/hfopenllm_v2/rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B/18284816-2f69-41c5-8cf3-5209ed77cb7d.json new file mode 100644 index 000000000..7b65e4ae4 --- /dev/null +++ b/data/hfopenllm_v2/rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B/18284816-2f69-41c5-8cf3-5209ed77cb7d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rsh345_mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B/1762652580.501065", + "retrieved_timestamp": "1762652580.501066", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B", + "developer": "rsh345", + "inference_platform": "unknown", + "id": "rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3891807071902552 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5188437309746964 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07326283987915408 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3028523489932886 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46719791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30535239361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rubenroy/Geneva-12B-GCv2-5m/e6649e50-54ba-4788-a3b4-5aa3d6e8aed8.json b/data/hfopenllm_v2/rubenroy/Geneva-12B-GCv2-5m/e6649e50-54ba-4788-a3b4-5aa3d6e8aed8.json new file mode 100644 index 000000000..3ccc1925a --- /dev/null +++ b/data/hfopenllm_v2/rubenroy/Geneva-12B-GCv2-5m/e6649e50-54ba-4788-a3b4-5aa3d6e8aed8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rubenroy_Geneva-12B-GCv2-5m/1762652580.501345", + "retrieved_timestamp": "1762652580.501346", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rubenroy/Geneva-12B-GCv2-5m", + "developer": "rubenroy", + "inference_platform": "unknown", + "id": "rubenroy/Geneva-12B-GCv2-5m" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2586381911106974 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5278373390214104 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08006042296072508 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3524791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3249667553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rubenroy/Gilgamesh-72B/b577bd26-a9f9-4a50-bd2b-f47bc5222748.json b/data/hfopenllm_v2/rubenroy/Gilgamesh-72B/b577bd26-a9f9-4a50-bd2b-f47bc5222748.json new file mode 100644 index 000000000..3ae28ed27 --- /dev/null +++ b/data/hfopenllm_v2/rubenroy/Gilgamesh-72B/b577bd26-a9f9-4a50-bd2b-f47bc5222748.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rubenroy_Gilgamesh-72B/1762652580.5016088", + "retrieved_timestamp": "1762652580.5016088", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rubenroy/Gilgamesh-72B", + "developer": "rubenroy", + "inference_platform": "unknown", + "id": "rubenroy/Gilgamesh-72B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8486006019583594 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7253327589560739 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39429530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46264583333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5802027925531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rubenroy/Zurich-14B-GCv2-5m/f9dca394-e108-48f3-a45d-a282f7b39098.json b/data/hfopenllm_v2/rubenroy/Zurich-14B-GCv2-5m/f9dca394-e108-48f3-a45d-a282f7b39098.json new file mode 100644 index 000000000..ed84cf306 --- /dev/null +++ b/data/hfopenllm_v2/rubenroy/Zurich-14B-GCv2-5m/f9dca394-e108-48f3-a45d-a282f7b39098.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rubenroy_Zurich-14B-GCv2-5m/1762652580.5018299", + "retrieved_timestamp": "1762652580.5018299", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rubenroy/Zurich-14B-GCv2-5m", + "developer": "rubenroy", + "inference_platform": "unknown", + "id": "rubenroy/Zurich-14B-GCv2-5m" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6163679038285084 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6308359017750411 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3074018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3615771812080537 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4874479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5232712765957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ruizhe1217/sft-s1-qwen-0.5b/fd0e4ea3-ed10-487d-85d7-df5669bc8edc.json b/data/hfopenllm_v2/ruizhe1217/sft-s1-qwen-0.5b/fd0e4ea3-ed10-487d-85d7-df5669bc8edc.json new file mode 100644 index 000000000..52b2985d2 --- /dev/null +++ b/data/hfopenllm_v2/ruizhe1217/sft-s1-qwen-0.5b/fd0e4ea3-ed10-487d-85d7-df5669bc8edc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ruizhe1217_sft-s1-qwen-0.5b/1762652580.502058", + "retrieved_timestamp": "1762652580.502059", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ruizhe1217/sft-s1-qwen-0.5b", + "developer": "ruizhe1217", + "inference_platform": "unknown", + "id": "ruizhe1217/sft-s1-qwen-0.5b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27487510915482033 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33005365550588683 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.061933534743202415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27097315436241615 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31958333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1891622340425532 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 0.494 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/rwitz/go-bruins-v2/2f6a8cce-672f-4634-99ed-9d42df9cd26c.json b/data/hfopenllm_v2/rwitz/go-bruins-v2/2f6a8cce-672f-4634-99ed-9d42df9cd26c.json new file mode 100644 index 000000000..e45a514d8 --- /dev/null +++ b/data/hfopenllm_v2/rwitz/go-bruins-v2/2f6a8cce-672f-4634-99ed-9d42df9cd26c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/rwitz_go-bruins-v2/1762652580.5023239", + "retrieved_timestamp": "1762652580.502325", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "rwitz/go-bruins-v2", + "developer": "rwitz", + "inference_platform": "unknown", + "id": "rwitz/go-bruins-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40958877999264176 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37988446841089685 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06722054380664652 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2760970744680851 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sabersaleh/Llama2-7B-DPO/c2ffce0d-069d-48bb-989c-6fb18bdd9059.json b/data/hfopenllm_v2/sabersaleh/Llama2-7B-DPO/c2ffce0d-069d-48bb-989c-6fb18bdd9059.json new file mode 100644 index 000000000..39021843e --- /dev/null +++ b/data/hfopenllm_v2/sabersaleh/Llama2-7B-DPO/c2ffce0d-069d-48bb-989c-6fb18bdd9059.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-DPO/1762652580.50325", + "retrieved_timestamp": "1762652580.503252", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sabersaleh/Llama2-7B-DPO", + "developer": "sabersaleh", + "inference_platform": "unknown", + "id": "sabersaleh/Llama2-7B-DPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14533105493424114 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3512218731420535 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4113645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16256648936170212 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/saishf/Fimbulvetr-Kuro-Lotus-10.7B/941a914d-0ca4-4896-9dfb-929c08c8651b.json b/data/hfopenllm_v2/saishf/Fimbulvetr-Kuro-Lotus-10.7B/941a914d-0ca4-4896-9dfb-929c08c8651b.json new file mode 100644 index 000000000..9982a3065 --- /dev/null +++ b/data/hfopenllm_v2/saishf/Fimbulvetr-Kuro-Lotus-10.7B/941a914d-0ca4-4896-9dfb-929c08c8651b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/saishf_Fimbulvetr-Kuro-Lotus-10.7B/1762652580.5057359", + "retrieved_timestamp": "1762652580.5057359", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "saishf/Fimbulvetr-Kuro-Lotus-10.7B", + "developer": "saishf", + "inference_platform": "unknown", + "id": "saishf/Fimbulvetr-Kuro-Lotus-10.7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49394384677101205 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4342316286386943 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4445104166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33892952127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/saishf/Neural-SOVLish-Devil-8B-L3/d12855a1-81cb-4fab-b36e-dbee6c6d69a9.json b/data/hfopenllm_v2/saishf/Neural-SOVLish-Devil-8B-L3/d12855a1-81cb-4fab-b36e-dbee6c6d69a9.json new file mode 100644 index 000000000..435c4f71e --- /dev/null +++ b/data/hfopenllm_v2/saishf/Neural-SOVLish-Devil-8B-L3/d12855a1-81cb-4fab-b36e-dbee6c6d69a9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/saishf_Neural-SOVLish-Devil-8B-L3/1762652580.506007", + "retrieved_timestamp": "1762652580.506007", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "saishf/Neural-SOVLish-Devil-8B-L3", + "developer": "saishf", + "inference_platform": "unknown", + "id": "saishf/Neural-SOVLish-Devil-8B-L3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41988036188424493 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5141802159065874 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4109583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3807347074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/saishshinde15/TethysAI_Base_Reasoning/74cb7205-e6c9-4faf-a84e-c15daa2ba62b.json b/data/hfopenllm_v2/saishshinde15/TethysAI_Base_Reasoning/74cb7205-e6c9-4faf-a84e-c15daa2ba62b.json new file mode 100644 index 000000000..925f7e8fb --- /dev/null +++ b/data/hfopenllm_v2/saishshinde15/TethysAI_Base_Reasoning/74cb7205-e6c9-4faf-a84e-c15daa2ba62b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/saishshinde15_TethysAI_Base_Reasoning/1762652580.5062242", + "retrieved_timestamp": "1762652580.5062249", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "saishshinde15/TethysAI_Base_Reasoning", + "developer": "saishshinde15", + "inference_platform": "unknown", + "id": "saishshinde15/TethysAI_Base_Reasoning" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6368757119997164 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4518558867290183 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31419939577039274 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4074583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3236369680851064 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/saishshinde15/TethysAI_Vortex/6e20bb3a-728d-40ef-b6ca-91b0dde02da4.json b/data/hfopenllm_v2/saishshinde15/TethysAI_Vortex/6e20bb3a-728d-40ef-b6ca-91b0dde02da4.json new file mode 100644 index 000000000..3956104f5 --- /dev/null +++ b/data/hfopenllm_v2/saishshinde15/TethysAI_Vortex/6e20bb3a-728d-40ef-b6ca-91b0dde02da4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/saishshinde15_TethysAI_Vortex/1762652580.5066721", + "retrieved_timestamp": "1762652580.5066729", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "saishshinde15/TethysAI_Vortex", + "developer": "saishshinde15", + "inference_platform": "unknown", + "id": "saishshinde15/TethysAI_Vortex" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4297718941297978 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4749261293502527 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3149546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44578125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3240525265957447 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/saishshinde15/TethysAI_Vortex_Reasoning/79022531-2599-4c19-93e0-ecdbde7bf736.json b/data/hfopenllm_v2/saishshinde15/TethysAI_Vortex_Reasoning/79022531-2599-4c19-93e0-ecdbde7bf736.json new file mode 100644 index 000000000..6835a2317 --- /dev/null +++ b/data/hfopenllm_v2/saishshinde15/TethysAI_Vortex_Reasoning/79022531-2599-4c19-93e0-ecdbde7bf736.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/saishshinde15_TethysAI_Vortex_Reasoning/1762652580.506901", + "retrieved_timestamp": "1762652580.506902", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "saishshinde15/TethysAI_Vortex_Reasoning", + "developer": "saishshinde15", + "inference_platform": "unknown", + "id": "saishshinde15/TethysAI_Vortex_Reasoning" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40211970903868405 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4693805860486275 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21450151057401812 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40844791666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3380984042553192 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sakaltcommunity/novablast-preview/588d2387-29de-41bc-8233-674081948787.json b/data/hfopenllm_v2/sakaltcommunity/novablast-preview/588d2387-29de-41bc-8233-674081948787.json new file mode 100644 index 000000000..bfb525751 --- /dev/null +++ b/data/hfopenllm_v2/sakaltcommunity/novablast-preview/588d2387-29de-41bc-8233-674081948787.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sakaltcommunity_novablast-preview/1762652580.507118", + "retrieved_timestamp": "1762652580.5071192", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sakaltcommunity/novablast-preview", + "developer": "sakaltcommunity", + "inference_platform": "unknown", + "id": "sakaltcommunity/novablast-preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4530279657974175 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7042765234852668 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48942598187311176 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38171140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5021145833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5915059840425532 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sakaltcommunity/sakaltum-7b/5fdd75fd-6e57-4ba4-8b6a-58998ff88bd9.json b/data/hfopenllm_v2/sakaltcommunity/sakaltum-7b/5fdd75fd-6e57-4ba4-8b6a-58998ff88bd9.json new file mode 100644 index 000000000..18f1a2064 --- /dev/null +++ b/data/hfopenllm_v2/sakaltcommunity/sakaltum-7b/5fdd75fd-6e57-4ba4-8b6a-58998ff88bd9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sakaltcommunity_sakaltum-7b/1762652580.5073972", + "retrieved_timestamp": "1762652580.507398", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sakaltcommunity/sakaltum-7b", + "developer": "sakaltcommunity", + "inference_platform": "unknown", + "id": "sakaltcommunity/sakaltum-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2603868845773658 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4575213514148995 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3775 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2769281914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.0/fe959cc1-17bd-4e87-b9b7-84d3adddbedb.json b/data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.0/fe959cc1-17bd-4e87-b9b7-84d3adddbedb.json new file mode 100644 index 000000000..c8d33fb12 --- /dev/null +++ b/data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.0/fe959cc1-17bd-4e87-b9b7-84d3adddbedb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/saltlux_luxia-21.4b-alignment-v1.0/1762652580.507964", + "retrieved_timestamp": "1762652580.5079648", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "saltlux/luxia-21.4b-alignment-v1.0", + "developer": "saltlux", + "inference_platform": "unknown", + "id": "saltlux/luxia-21.4b-alignment-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36929679915956326 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6373342606775594 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09743202416918428 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43284374999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34034242021276595 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 21.421 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.2/b89b30bb-fbaa-4ac6-8535-9f31cf87eb55.json b/data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.2/b89b30bb-fbaa-4ac6-8535-9f31cf87eb55.json new file mode 100644 index 000000000..dbcd145ba --- /dev/null +++ b/data/hfopenllm_v2/saltlux/luxia-21.4b-alignment-v1.2/b89b30bb-fbaa-4ac6-8535-9f31cf87eb55.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/saltlux_luxia-21.4b-alignment-v1.2/1762652580.508301", + "retrieved_timestamp": "1762652580.5083032", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "saltlux/luxia-21.4b-alignment-v1.2", + "developer": "saltlux", + "inference_platform": "unknown", + "id": "saltlux/luxia-21.4b-alignment-v1.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41153694419695297 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6371180708112368 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4458958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34732380319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 21.421 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sam-paech/Darkest-muse-v1/dae1ceb0-97b1-4285-b9db-912d7b4b01c7.json b/data/hfopenllm_v2/sam-paech/Darkest-muse-v1/dae1ceb0-97b1-4285-b9db-912d7b4b01c7.json new file mode 100644 index 000000000..ab42682da --- /dev/null +++ b/data/hfopenllm_v2/sam-paech/Darkest-muse-v1/dae1ceb0-97b1-4285-b9db-912d7b4b01c7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sam-paech_Darkest-muse-v1/1762652580.508588", + "retrieved_timestamp": "1762652580.508589", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sam-paech/Darkest-muse-v1", + "developer": "sam-paech", + "inference_platform": "unknown", + "id": "sam-paech/Darkest-muse-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7344202272193336 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5968439530708949 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21450151057401812 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34395973154362414 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4502083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4183843085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sam-paech/Delirium-v1/78dd5568-0d0d-4cc5-ad1a-bfba857c827e.json b/data/hfopenllm_v2/sam-paech/Delirium-v1/78dd5568-0d0d-4cc5-ad1a-bfba857c827e.json new file mode 100644 index 000000000..0113f7f92 --- /dev/null +++ b/data/hfopenllm_v2/sam-paech/Delirium-v1/78dd5568-0d0d-4cc5-ad1a-bfba857c827e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sam-paech_Delirium-v1/1762652580.508875", + "retrieved_timestamp": "1762652580.508876", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sam-paech/Delirium-v1", + "developer": "sam-paech", + "inference_platform": "unknown", + "id": "sam-paech/Delirium-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7207564816908026 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5962113834521733 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2107250755287009 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34312080536912754 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45144791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4189660904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sam-paech/Quill-v1/248541b3-aeae-429d-93ae-06cc3bc82cd8.json b/data/hfopenllm_v2/sam-paech/Quill-v1/248541b3-aeae-429d-93ae-06cc3bc82cd8.json new file mode 100644 index 000000000..16b15becb --- /dev/null +++ b/data/hfopenllm_v2/sam-paech/Quill-v1/248541b3-aeae-429d-93ae-06cc3bc82cd8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sam-paech_Quill-v1/1762652580.5091672", + "retrieved_timestamp": "1762652580.5091681", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sam-paech/Quill-v1", + "developer": "sam-paech", + "inference_platform": "unknown", + "id": "sam-paech/Quill-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.712213593265868 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5969226347989487 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2122356495468278 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33976510067114096 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45547916666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4171376329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sarvamai/OpenHathi-7B-Hi-v0.1-Base/e0c03300-a08f-409e-9f39-f00d5e9e126f.json b/data/hfopenllm_v2/sarvamai/OpenHathi-7B-Hi-v0.1-Base/e0c03300-a08f-409e-9f39-f00d5e9e126f.json new file mode 100644 index 000000000..4b674c410 --- /dev/null +++ b/data/hfopenllm_v2/sarvamai/OpenHathi-7B-Hi-v0.1-Base/e0c03300-a08f-409e-9f39-f00d5e9e126f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sarvamai_OpenHathi-7B-Hi-v0.1-Base/1762652580.509491", + "retrieved_timestamp": "1762652580.5094929", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sarvamai/OpenHathi-7B-Hi-v0.1-Base", + "developer": "sarvamai", + "inference_platform": "unknown", + "id": "sarvamai/OpenHathi-7B-Hi-v0.1-Base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18040244329490196 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33540458231510667 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.008308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36584375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15433843085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.87 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/schnapss/testmerge-7b/faa7be96-1419-48be-9b95-e97689296de0.json b/data/hfopenllm_v2/schnapss/testmerge-7b/faa7be96-1419-48be-9b95-e97689296de0.json new file mode 100644 index 000000000..338a4b297 --- /dev/null +++ b/data/hfopenllm_v2/schnapss/testmerge-7b/faa7be96-1419-48be-9b95-e97689296de0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/schnapss_testmerge-7b/1762652580.509877", + "retrieved_timestamp": "1762652580.509878", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "schnapss/testmerge-7b", + "developer": "schnapss", + "inference_platform": "unknown", + "id": "schnapss/testmerge-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39222817679313116 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5187478405637375 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06873111782477341 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4685625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30601728723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sci-m-wang/Mistral-7B-Instruct-sa-v0.1/8125700c-d9e7-4d6e-9b78-049331dd571b.json b/data/hfopenllm_v2/sci-m-wang/Mistral-7B-Instruct-sa-v0.1/8125700c-d9e7-4d6e-9b78-049331dd571b.json new file mode 100644 index 000000000..865b14fd3 --- /dev/null +++ b/data/hfopenllm_v2/sci-m-wang/Mistral-7B-Instruct-sa-v0.1/8125700c-d9e7-4d6e-9b78-049331dd571b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sci-m-wang_Mistral-7B-Instruct-sa-v0.1/1762652580.510147", + "retrieved_timestamp": "1762652580.510148", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sci-m-wang/Mistral-7B-Instruct-sa-v0.1", + "developer": "sci-m-wang", + "inference_platform": "unknown", + "id": "sci-m-wang/Mistral-7B-Instruct-sa-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4335186194851882 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32727821561411724 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38999999999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2362034574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 14.483 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1/319484e0-12aa-4212-b55f-d19efdd2f719.json b/data/hfopenllm_v2/sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1/319484e0-12aa-4212-b55f-d19efdd2f719.json new file mode 100644 index 000000000..5b084dfc7 --- /dev/null +++ b/data/hfopenllm_v2/sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1/319484e0-12aa-4212-b55f-d19efdd2f719.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sci-m-wang_Phi-3-mini-4k-instruct-sa-v0.1/1762652580.510415", + "retrieved_timestamp": "1762652580.510418", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1", + "developer": "sci-m-wang", + "inference_platform": "unknown", + "id": "sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5020623057930734 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5502038722383045 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14803625377643503 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288590604026846 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40730208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39852061170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 7.642 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sci-m-wang/deepseek-llm-7b-chat-sa-v0.1/182d68d5-9b03-41bc-850c-1f571c36e630.json b/data/hfopenllm_v2/sci-m-wang/deepseek-llm-7b-chat-sa-v0.1/182d68d5-9b03-41bc-850c-1f571c36e630.json new file mode 100644 index 000000000..717e87276 --- /dev/null +++ b/data/hfopenllm_v2/sci-m-wang/deepseek-llm-7b-chat-sa-v0.1/182d68d5-9b03-41bc-850c-1f571c36e630.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sci-m-wang_deepseek-llm-7b-chat-sa-v0.1/1762652580.5106509", + "retrieved_timestamp": "1762652580.5106518", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sci-m-wang/deepseek-llm-7b-chat-sa-v0.1", + "developer": "sci-m-wang", + "inference_platform": "unknown", + "id": "sci-m-wang/deepseek-llm-7b-chat-sa-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4035935761557113 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37177200995276305 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25671140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4173125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22091090425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/scripts/create_data.py b/data/hfopenllm_v2/scripts/create_data.py new file mode 100644 index 000000000..3be261d98 --- /dev/null +++ b/data/hfopenllm_v2/scripts/create_data.py @@ -0,0 +1,306 @@ +import json +import time +import uuid +from pathlib import Path +from typing import Dict, List, Any, Optional + + +def evaluation_description(evaluation_name: str) -> str: + if evaluation_name == "MATH Level 5": + return "Exact Match on MATH Level 5" + return f"Accuracy on {evaluation_name}" + + +def extract_company_name(model_name: str) -> Optional[str]: + """ + Extract company name from model name based on known patterns. + Only applies to original models, not finetuned versions. + + Args: + model_name: Full model name (e.g., "meta-llama/Llama-3-8B") + + Returns: + Company name if recognized pattern found and it's an original model, else None + """ + # Company to model name patterns mapping + company_patterns = { + "meta": ["llama"], + "google": ["gemini", "gemma"], + "openai": ["gpt"], + "anthropic": ["claude"], + "alibaba": ["qwen"], + "microsoft": ["phi"], + "mistral": ["mistral"], + } + + model_name_lower = model_name.lower() + + # Check if this is a finetuned model (contains typical finetune indicators) + finetune_indicators = [ + "-dpo", + "-sft", + "-instruct", + "-chat", + "-rlhf", + "-tune", + "finetuned", + "ft-", + ] + + # If it has a slash, check if the part after slash contains finetune indicators + if "/" in model_name: + model_part = model_name.split("/", 1)[1].lower() + # Check for finetune indicators, but exclude the base model names themselves + for indicator in finetune_indicators: + if indicator in model_part: + # Check if it's not part of an official model name + is_official_variant = any( + pattern in model_part + for patterns in company_patterns.values() + for pattern in patterns + ) + # If it has finetune indicators and looks like a third-party finetune, return None + if not is_official_variant or any( + char.isdigit() and indicator in model_part for char in model_part + ): + return None + + # Check for company patterns in the model name + for company, patterns in company_patterns.items(): + for pattern in patterns: + if pattern in model_name_lower: + return company + + return None + + +def convert_to_evalhub_format(input_data: Dict[str, Any]) -> Dict[str, Any]: + """ + Convert a model evaluation dict to evalHub format. + + Args: + input_data: Dict containing model and evaluation information + + Returns: + Dict in evalHub format + """ + model_name = input_data["model"]["name"] + + # Create evaluation results list + evaluation_results = [] + + # Map evaluations to the new format + evaluation_mapping = { + "ifeval": "IFEval", + "bbh": "BBH", + "math": "MATH Level 5", + "gpqa": "GPQA", + "musr": "MUSR", + "mmlu_pro": "MMLU-PRO", + } + + for eval_key, eval_data in input_data.get("evaluations", {}).items(): + evaluation_result = { + "evaluation_name": eval_data.get( + "name", evaluation_mapping.get(eval_key, eval_key) + ), + "metric_config": { + "evaluation_description": evaluation_description( + eval_data.get("name", eval_key) + ), + "lower_is_better": False, + "score_type": "continuous", + "min_score": 0, + "max_score": 1, + }, + "score_details": {"score": eval_data.get("value", 0.0)}, + } + evaluation_results.append(evaluation_result) + + # Create additional_details dict + additional_details = {} + if "precision" in input_data["model"]: + additional_details["precision"] = input_data["model"]["precision"] + if "architecture" in input_data["model"]: + additional_details["architecture"] = input_data["model"]["architecture"] + if "params_billions" in input_data.get("metadata", {}): + additional_details["params_billions"] = input_data["metadata"][ + "params_billions" + ] + + # Extract developer name from model name + # First, try to extract company name if it's an original model + company_name = extract_company_name(model_name) + + if company_name: + developer = company_name + elif "/" in model_name: + developer = model_name.split("/")[0] + else: + developer = "Unknown" + + # Create the evalHub format + output_data = { + "schema_version": "0.0.1", + "evaluation_id": f"hfopenllm_v2/{model_name.replace('/', '_')}/{time.time()}", + "retrieved_timestamp": str(time.time()), + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard", + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party", + }, + "model_info": { + "name": model_name, + "developer": developer, + "inference_platform": "unknown", + "id": f"{developer}/{model_name}" if "/" not in model_name else model_name, + }, + "evaluation_results": evaluation_results, + } + + # Add additional_details only if it has content + if additional_details: + output_data["additional_details"] = additional_details + + return output_data + + +def process_models( + models_data: List[Dict[str, Any]], output_dir: str = "/Users/random/every_eval_ever/data/hfopenllm_v2" +): + """ + Process a list of model evaluation dicts and save them in evalHub format. + Follows the structure: {leaderboard_name}/{developer_name}/{model_name}/{uuid}.json + + Args: + models_data: List of dicts containing model and evaluation information + output_dir: Base directory (should be the leaderboard name folder) + """ + output_path = Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + + for model_data in models_data: + try: + # Convert to evalHub format + converted_data = convert_to_evalhub_format(model_data) + + # Get model name and parse developer/model + model_name = model_data["model"]["name"] + + # Extract developer (will use company name if applicable) + company_name = extract_company_name(model_name) + + if company_name: + developer = company_name + model = model_name # Keep full model name + elif "/" in model_name: + developer, model = model_name.split("/", 1) + else: + developer = "Unknown" + model = model_name + + # Create folder structure: {leaderboard}/{developer}/{model}/ + model_dir = output_path / developer / model + model_dir.mkdir(parents=True, exist_ok=True) + + # Generate UUID for the filename + file_uuid = str(uuid.uuid4()) + output_file = model_dir / f"{file_uuid}.json" + + # Save to file + with open(output_file, "w") as f: + json.dump(converted_data, f, indent=2) + + print(f"✓ Converted: {model_name} -> {output_file}") + + except Exception as e: + print( + f"✗ Error processing {model_data.get('model', {}).get('name', 'unknown')}: {e}" + ) + + +# Example usage +if __name__ == "__main__": + # Example: Single model conversion + example_model = { + "id": "0-hero/Matter-0.2-7B-DPO_bfloat16_26a66f0d862e2024ce4ad0a09c37052ac36e8af6_True", + "model": { + "name": "0-hero/Matter-0.2-7B-DPO", + "sha": "26a66f0d862e2024ce4ad0a09c37052ac36e8af6", + "precision": "bfloat16", + "type": "chatmodels", + "weight_type": "Original", + "architecture": "MistralForCausalLM", + "average_score": 8.90636130175029, + "has_chat_template": True, + }, + "evaluations": { + "ifeval": { + "name": "IFEval", + "value": 0.3302792147058693, + "normalized_score": 33.02792147058693, + }, + "bbh": { + "name": "BBH", + "value": 0.3596254301656297, + "normalized_score": 10.055525080241035, + }, + "math": { + "name": "MATH Level 5", + "value": 0.014350453172205438, + "normalized_score": 1.4350453172205437, + }, + "gpqa": { + "name": "GPQA", + "value": 0.25922818791946306, + "normalized_score": 1.230425055928408, + }, + "musr": { + "name": "MUSR", + "value": 0.381375, + "normalized_score": 5.871874999999999, + }, + "mmlu_pro": { + "name": "MMLU-PRO", + "value": 0.1163563829787234, + "normalized_score": 1.8173758865248217, + }, + }, + "features": { + "is_not_available_on_hub": True, + "is_merged": False, + "is_moe": False, + "is_flagged": False, + "is_official_provider": False, + }, + "metadata": { + "upload_date": "2024-04-13", + "submission_date": "2024-08-05", + "generation": 0, + "base_model": "0-hero/Matter-0.2-7B-DPO", + "hub_license": "apache-2.0", + "hub_hearts": 3, + "params_billions": 7.242, + "co2_cost": 1.219174164123715, + }, + } + + # Process single model + data_path = "/Users/random/every_eval_ever/data/formatted" + all_models = [] + with open(data_path, "r") as f: + all_models = json.load(f) + + process_models(all_models) + # process_models([example_model]) + + # Or load from a JSON file containing a list of models: + # with open('models_data.json', 'r') as f: + # models_list = json.load(f) + # process_models(models_list) diff --git a/data/hfopenllm_v2/senseable/WestLake-7B-v2/6ef15d50-74b7-4e09-856c-05343841e24b.json b/data/hfopenllm_v2/senseable/WestLake-7B-v2/6ef15d50-74b7-4e09-856c-05343841e24b.json new file mode 100644 index 000000000..5ce3a919e --- /dev/null +++ b/data/hfopenllm_v2/senseable/WestLake-7B-v2/6ef15d50-74b7-4e09-856c-05343841e24b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/senseable_WestLake-7B-v2/1762652580.511263", + "retrieved_timestamp": "1762652580.511264", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "senseable/WestLake-7B-v2", + "developer": "senseable", + "inference_platform": "unknown", + "id": "senseable/WestLake-7B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4418620371724801 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4073276290688943 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04833836858006042 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39371874999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27642952127659576 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct/49334550-08eb-49a2-9cea-f90f22533ab1.json b/data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct/49334550-08eb-49a2-9cea-f90f22533ab1.json new file mode 100644 index 000000000..05d1c135b --- /dev/null +++ b/data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct/49334550-08eb-49a2-9cea-f90f22533ab1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sethuiyer_Llama-3.1-8B-Experimental-1206-Instruct/1762652580.512954", + "retrieved_timestamp": "1762652580.512954", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct", + "developer": "sethuiyer", + "inference_platform": "unknown", + "id": "sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6967014189018471 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.510381184158217 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11178247734138973 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39657291666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35289228723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct/d4b778ea-ae70-437f-a295-772abc659027.json b/data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct/d4b778ea-ae70-437f-a295-772abc659027.json new file mode 100644 index 000000000..a074180c1 --- /dev/null +++ b/data/hfopenllm_v2/sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct/d4b778ea-ae70-437f-a295-772abc659027.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sethuiyer_Llama-3.1-8B-Experimental-1208-Instruct/1762652580.513202", + "retrieved_timestamp": "1762652580.513203", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct", + "developer": "sethuiyer", + "inference_platform": "unknown", + "id": "sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6099981382731153 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49642264289263355 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3789895833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35106382978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sethuiyer/Llamaverse-3.1-8B-Instruct/f0a224c2-037a-4229-bb00-5d76d3974078.json b/data/hfopenllm_v2/sethuiyer/Llamaverse-3.1-8B-Instruct/f0a224c2-037a-4229-bb00-5d76d3974078.json new file mode 100644 index 000000000..0f619db72 --- /dev/null +++ b/data/hfopenllm_v2/sethuiyer/Llamaverse-3.1-8B-Instruct/f0a224c2-037a-4229-bb00-5d76d3974078.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sethuiyer_Llamaverse-3.1-8B-Instruct/1762652580.513652", + "retrieved_timestamp": "1762652580.513653", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sethuiyer/Llamaverse-3.1-8B-Instruct", + "developer": "sethuiyer", + "inference_platform": "unknown", + "id": "sethuiyer/Llamaverse-3.1-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6185410266980501 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5414159562743479 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18580060422960726 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3761666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3523105053191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sethuiyer/Llamazing-3.1-8B-Instruct/9065a7df-dab7-4e3b-bbc5-01f2908c37b3.json b/data/hfopenllm_v2/sethuiyer/Llamazing-3.1-8B-Instruct/9065a7df-dab7-4e3b-bbc5-01f2908c37b3.json new file mode 100644 index 000000000..fdf3eb3a0 --- /dev/null +++ b/data/hfopenllm_v2/sethuiyer/Llamazing-3.1-8B-Instruct/9065a7df-dab7-4e3b-bbc5-01f2908c37b3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sethuiyer_Llamazing-3.1-8B-Instruct/1762652580.513854", + "retrieved_timestamp": "1762652580.513855", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sethuiyer/Llamazing-3.1-8B-Instruct", + "developer": "sethuiyer", + "inference_platform": "unknown", + "id": "sethuiyer/Llamazing-3.1-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5711301568726534 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.529106967510303 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.054380664652567974 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39759374999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3606216755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/shadowml/BeagSake-7B/2a71923c-8697-4b62-94fa-4c16874df7a7.json b/data/hfopenllm_v2/shadowml/BeagSake-7B/2a71923c-8697-4b62-94fa-4c16874df7a7.json new file mode 100644 index 000000000..80c8478e4 --- /dev/null +++ b/data/hfopenllm_v2/shadowml/BeagSake-7B/2a71923c-8697-4b62-94fa-4c16874df7a7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/shadowml_BeagSake-7B/1762652580.514317", + "retrieved_timestamp": "1762652580.514318", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "shadowml/BeagSake-7B", + "developer": "shadowml", + "inference_platform": "unknown", + "id": "shadowml/BeagSake-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5215960318621258 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47110342371098474 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05060422960725076 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41235416666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25847739361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/shadowml/Mixolar-4x7b/65a2c055-9bb5-458d-8a65-89b363b47a3a.json b/data/hfopenllm_v2/shadowml/Mixolar-4x7b/65a2c055-9bb5-458d-8a65-89b363b47a3a.json new file mode 100644 index 000000000..383b0fc4b --- /dev/null +++ b/data/hfopenllm_v2/shadowml/Mixolar-4x7b/65a2c055-9bb5-458d-8a65-89b363b47a3a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/shadowml_Mixolar-4x7b/1762652580.5145578", + "retrieved_timestamp": "1762652580.514559", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "shadowml/Mixolar-4x7b", + "developer": "shadowml", + "inference_platform": "unknown", + "id": "shadowml/Mixolar-4x7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3893303102434873 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5215949876221495 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0581570996978852 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42575 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33053523936170215 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 36.099 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/shastraai/Shastra-LLAMA2-Math-Commonsense-SFT/563e2894-10bf-43e1-af67-5cd97d52f033.json b/data/hfopenllm_v2/shastraai/Shastra-LLAMA2-Math-Commonsense-SFT/563e2894-10bf-43e1-af67-5cd97d52f033.json new file mode 100644 index 000000000..201c89478 --- /dev/null +++ b/data/hfopenllm_v2/shastraai/Shastra-LLAMA2-Math-Commonsense-SFT/563e2894-10bf-43e1-af67-5cd97d52f033.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/shastraai_Shastra-LLAMA2-Math-Commonsense-SFT/1762652580.5147672", + "retrieved_timestamp": "1762652580.5147672", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "shastraai/Shastra-LLAMA2-Math-Commonsense-SFT", + "developer": "shastraai", + "inference_platform": "unknown", + "id": "shastraai/Shastra-LLAMA2-Math-Commonsense-SFT" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3041507644161935 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.384316753625765 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.017371601208459216 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3604479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19971742021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/shivam9980/NEPALI-LLM/234f5f98-a5fc-417a-8463-186bf600993a.json b/data/hfopenllm_v2/shivam9980/NEPALI-LLM/234f5f98-a5fc-417a-8463-186bf600993a.json new file mode 100644 index 000000000..08b751d54 --- /dev/null +++ b/data/hfopenllm_v2/shivam9980/NEPALI-LLM/234f5f98-a5fc-417a-8463-186bf600993a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/shivam9980_NEPALI-LLM/1762652580.51522", + "retrieved_timestamp": "1762652580.5152209", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "shivam9980/NEPALI-LLM", + "developer": "shivam9980", + "inference_platform": "unknown", + "id": "shivam9980/NEPALI-LLM" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.041666112581284324 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3828457133787513 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41219791666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2064494680851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.273 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/shuttleai/shuttle-3/bc357a38-215b-4885-9e0e-6f2b6f0bf1cc.json b/data/hfopenllm_v2/shuttleai/shuttle-3/bc357a38-215b-4885-9e0e-6f2b6f0bf1cc.json new file mode 100644 index 000000000..fd14890dd --- /dev/null +++ b/data/hfopenllm_v2/shuttleai/shuttle-3/bc357a38-215b-4885-9e0e-6f2b6f0bf1cc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/shuttleai_shuttle-3/1762652580.5160902", + "retrieved_timestamp": "1762652580.5160909", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "shuttleai/shuttle-3", + "developer": "shuttleai", + "inference_platform": "unknown", + "id": "shuttleai/shuttle-3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.815403130360776 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7420334281529087 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45996978851963743 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41191275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4376875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5716422872340425 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/shyamieee/Padma-v7.0/81546997-4dda-45ea-81fb-23db1b3b5cd7.json b/data/hfopenllm_v2/shyamieee/Padma-v7.0/81546997-4dda-45ea-81fb-23db1b3b5cd7.json new file mode 100644 index 000000000..d9ac77caf --- /dev/null +++ b/data/hfopenllm_v2/shyamieee/Padma-v7.0/81546997-4dda-45ea-81fb-23db1b3b5cd7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/shyamieee_Padma-v7.0/1762652580.51635", + "retrieved_timestamp": "1762652580.51635", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "shyamieee/Padma-v7.0", + "developer": "shyamieee", + "inference_platform": "unknown", + "id": "shyamieee/Padma-v7.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3841097177710696 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5118785631761485 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0702416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2860738255033557 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43855208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3029421542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/silma-ai/SILMA-9B-Instruct-v1.0/de11a0bf-47ea-444f-bf89-45e9208cfd1a.json b/data/hfopenllm_v2/silma-ai/SILMA-9B-Instruct-v1.0/de11a0bf-47ea-444f-bf89-45e9208cfd1a.json new file mode 100644 index 000000000..e36b434d1 --- /dev/null +++ b/data/hfopenllm_v2/silma-ai/SILMA-9B-Instruct-v1.0/de11a0bf-47ea-444f-bf89-45e9208cfd1a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/silma-ai_SILMA-9B-Instruct-v1.0/1762652580.516612", + "retrieved_timestamp": "1762652580.516613", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "silma-ai/SILMA-9B-Instruct-v1.0", + "developer": "silma-ai", + "inference_platform": "unknown", + "id": "silma-ai/SILMA-9B-Instruct-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5841943820174914 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5219015032853501 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1163141993957704 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46369791666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39195478723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/silma-ai/SILMA-Kashif-2B-Instruct-v1.0/e6926be5-561b-453b-8d5f-e64f380c4a51.json b/data/hfopenllm_v2/silma-ai/SILMA-Kashif-2B-Instruct-v1.0/e6926be5-561b-453b-8d5f-e64f380c4a51.json new file mode 100644 index 000000000..ae1856c0f --- /dev/null +++ b/data/hfopenllm_v2/silma-ai/SILMA-Kashif-2B-Instruct-v1.0/e6926be5-561b-453b-8d5f-e64f380c4a51.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/silma-ai_SILMA-Kashif-2B-Instruct-v1.0/1762652580.516862", + "retrieved_timestamp": "1762652580.5168629", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "silma-ai/SILMA-Kashif-2B-Instruct-v1.0", + "developer": "silma-ai", + "inference_platform": "unknown", + "id": "silma-ai/SILMA-Kashif-2B-Instruct-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11807781131841291 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37932201246317715 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.011329305135951661 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2701342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4042604166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22581449468085107 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/skymizer/Llama2-7b-sft-chat-custom-template-dpo/24473e8a-2631-44b5-9cc2-81f0669d8032.json b/data/hfopenllm_v2/skymizer/Llama2-7b-sft-chat-custom-template-dpo/24473e8a-2631-44b5-9cc2-81f0669d8032.json new file mode 100644 index 000000000..9946cf72b --- /dev/null +++ b/data/hfopenllm_v2/skymizer/Llama2-7b-sft-chat-custom-template-dpo/24473e8a-2631-44b5-9cc2-81f0669d8032.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/skymizer_Llama2-7b-sft-chat-custom-template-dpo/1762652580.517826", + "retrieved_timestamp": "1762652580.517826", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "skymizer/Llama2-7b-sft-chat-custom-template-dpo", + "developer": "skymizer", + "inference_platform": "unknown", + "id": "skymizer/Llama2-7b-sft-chat-custom-template-dpo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2352823840742563 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36884662302661564 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23909395973154363 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44286458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19464760638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.738 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/ChocoTrio-14B-v1/c2034822-689f-4e8b-9575-b63081584aec.json b/data/hfopenllm_v2/sometimesanotion/ChocoTrio-14B-v1/c2034822-689f-4e8b-9575-b63081584aec.json new file mode 100644 index 000000000..a052c335c --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/ChocoTrio-14B-v1/c2034822-689f-4e8b-9575-b63081584aec.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_ChocoTrio-14B-v1/1762652580.518315", + "retrieved_timestamp": "1762652580.518315", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/ChocoTrio-14B-v1", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "id": "sometimesanotion/ChocoTrio-14B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7088912973133508 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6505840125855428 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3972809667673716 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3850671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4820520833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5369847074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-40/162b8329-ad84-463b-bda7-7383edda04d8.json b/data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-40/162b8329-ad84-463b-bda7-7383edda04d8.json new file mode 100644 index 000000000..4d70246c3 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-40/162b8329-ad84-463b-bda7-7383edda04d8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_IF-reasoning-experiment-40/1762652580.518558", + "retrieved_timestamp": "1762652580.518559", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/IF-reasoning-experiment-40", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "id": "sometimesanotion/IF-reasoning-experiment-40" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6329793835910938 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6111859401994667 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3716012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3800335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5194166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5024933510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-80/b1097c42-10fe-4892-8e85-60385ecf35bf.json b/data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-80/b1097c42-10fe-4892-8e85-60385ecf35bf.json new file mode 100644 index 000000000..43190ce2f --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/IF-reasoning-experiment-80/b1097c42-10fe-4892-8e85-60385ecf35bf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_IF-reasoning-experiment-80/1762652580.5187662", + "retrieved_timestamp": "1762652580.518767", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/IF-reasoning-experiment-80", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "id": "sometimesanotion/IF-reasoning-experiment-80" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5462761029623622 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42103836132239286 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09894259818731117 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28439597315436244 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5024583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3367686170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.383 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/KytheraMix-7B-v0.2/c50f0ef7-18e4-4f03-8262-ee1519c59b7f.json b/data/hfopenllm_v2/sometimesanotion/KytheraMix-7B-v0.2/c50f0ef7-18e4-4f03-8262-ee1519c59b7f.json new file mode 100644 index 000000000..ba28695d1 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/KytheraMix-7B-v0.2/c50f0ef7-18e4-4f03-8262-ee1519c59b7f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_KytheraMix-7B-v0.2/1762652580.5189881", + "retrieved_timestamp": "1762652580.5189881", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/KytheraMix-7B-v0.2", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "id": "sometimesanotion/KytheraMix-7B-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6128705168951715 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5635202746804572 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29229607250755285 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33557046979865773 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45941666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45054853723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.1-experimental/aa2b9fb3-77ca-4a48-b3dd-77879220a6b8.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.1-experimental/aa2b9fb3-77ca-4a48-b3dd-77879220a6b8.json new file mode 100644 index 000000000..b6beb75db --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.1-experimental/aa2b9fb3-77ca-4a48-b3dd-77879220a6b8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.1-experimental/1762652580.519198", + "retrieved_timestamp": "1762652580.519199", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Lamarck-14B-v0.1-experimental", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "id": "sometimesanotion/Lamarck-14B-v0.1-experimental" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5353850006870658 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6582539239967329 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3580060422960725 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38171140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47284375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5408078457446809 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.3/6103d107-0eb8-4b0e-8947-d5c7e7cb62f6.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.3/6103d107-0eb8-4b0e-8947-d5c7e7cb62f6.json new file mode 100644 index 000000000..c07cd309a --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.3/6103d107-0eb8-4b0e-8947-d5c7e7cb62f6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.3/1762652580.519407", + "retrieved_timestamp": "1762652580.5194082", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Lamarck-14B-v0.3", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "id": "sometimesanotion/Lamarck-14B-v0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5031616111916382 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6611400465373158 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3406344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3884228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4688125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5410571808510638 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-002-model_stock/bd904778-1ad9-48fe-a12e-4b62ce46bd0b.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-002-model_stock/bd904778-1ad9-48fe-a12e-4b62ce46bd0b.json new file mode 100644 index 000000000..1effe6111 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-002-model_stock/bd904778-1ad9-48fe-a12e-4b62ce46bd0b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.6-002-model_stock/1762652580.520087", + "retrieved_timestamp": "1762652580.520087", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Lamarck-14B-v0.6-002-model_stock", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "id": "sometimesanotion/Lamarck-14B-v0.6-002-model_stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.669224324791553 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6143349188724702 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3776435045317221 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37416107382550334 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5180208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5054022606382979 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-model_stock/92d4d9ca-d19f-45c5-b506-5b1039100c92.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-model_stock/92d4d9ca-d19f-45c5-b506-5b1039100c92.json new file mode 100644 index 000000000..3f783a318 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6-model_stock/92d4d9ca-d19f-45c5-b506-5b1039100c92.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.6-model_stock/1762652580.520298", + "retrieved_timestamp": "1762652580.520299", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Lamarck-14B-v0.6-model_stock", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "id": "sometimesanotion/Lamarck-14B-v0.6-model_stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6789662539838739 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6269436532753222 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4244712990936556 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38422818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.50065625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.519780585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6/dd7005a5-281d-42e9-9916-663b1641718f.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6/dd7005a5-281d-42e9-9916-663b1641718f.json new file mode 100644 index 000000000..4fd38ec3b --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.6/dd7005a5-281d-42e9-9916-663b1641718f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.6/1762652580.519876", + "retrieved_timestamp": "1762652580.519876", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Lamarck-14B-v0.6", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "id": "sometimesanotion/Lamarck-14B-v0.6" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6972510716011294 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6460312233782931 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4040785498489426 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38926174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4846875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5399767287234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-Fusion/480b1187-5f66-4414-84b1-4c6ce1ebf137.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-Fusion/480b1187-5f66-4414-84b1-4c6ce1ebf137.json new file mode 100644 index 000000000..553de41ae --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-Fusion/480b1187-5f66-4414-84b1-4c6ce1ebf137.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.7-Fusion/1762652580.52051", + "retrieved_timestamp": "1762652580.520511", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Lamarck-14B-v0.7-Fusion", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "id": "sometimesanotion/Lamarck-14B-v0.7-Fusion" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6821134589555713 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6543636625652262 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4040785498489426 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.401006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49913541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5390625 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc1/5919f71f-8d7b-4cce-a7ce-01680c08acf2.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc1/5919f71f-8d7b-4cce-a7ce-01680c08acf2.json new file mode 100644 index 000000000..a087e58bc --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc1/5919f71f-8d7b-4cce-a7ce-01680c08acf2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.7-rc1/1762652580.520714", + "retrieved_timestamp": "1762652580.520715", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Lamarck-14B-v0.7-rc1", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "id": "sometimesanotion/Lamarck-14B-v0.7-rc1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7305482785675341 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6486027992626241 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3851963746223565 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38926174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47147916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5415558510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc4/b3b9b1a5-4495-4649-9943-58986d94fcb1.json b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc4/b3b9b1a5-4495-4649-9943-58986d94fcb1.json new file mode 100644 index 000000000..13f235edb --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/Lamarck-14B-v0.7-rc4/b3b9b1a5-4495-4649-9943-58986d94fcb1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.7-rc4/1762652580.520921", + "retrieved_timestamp": "1762652580.5209222", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/Lamarck-14B-v0.7-rc4", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "id": "sometimesanotion/Lamarck-14B-v0.7-rc4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7210811757248545 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6509652911243554 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4025679758308157 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38926174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4911979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5399767287234043 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v1/e7577048-db59-4629-aeb0-f50b72cbb827.json b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v1/e7577048-db59-4629-aeb0-f50b72cbb827.json new file mode 100644 index 000000000..b476acae1 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v1/e7577048-db59-4629-aeb0-f50b72cbb827.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_LamarckInfusion-14B-v1/1762652580.521131", + "retrieved_timestamp": "1762652580.521132", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/LamarckInfusion-14B-v1", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "id": "sometimesanotion/LamarckInfusion-14B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7198322672730577 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6539252513912222 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4169184290030212 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39093959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48989583333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5376496010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-hi/e4b943ea-3e97-490b-af6d-ad7dc0fdf012.json b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-hi/e4b943ea-3e97-490b-af6d-ad7dc0fdf012.json new file mode 100644 index 000000000..eff1cb3c7 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-hi/e4b943ea-3e97-490b-af6d-ad7dc0fdf012.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_LamarckInfusion-14B-v2-hi/1762652580.521555", + "retrieved_timestamp": "1762652580.521556", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/LamarckInfusion-14B-v2-hi", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "id": "sometimesanotion/LamarckInfusion-14B-v2-hi" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.685485622592499 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6555026541798943 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4229607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3884228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48471875000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5404753989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-lo/57084771-cc66-485c-99ca-470556e14c1b.json b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-lo/57084771-cc66-485c-99ca-470556e14c1b.json new file mode 100644 index 000000000..63bcf2feb --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2-lo/57084771-cc66-485c-99ca-470556e14c1b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_LamarckInfusion-14B-v2-lo/1762652580.52177", + "retrieved_timestamp": "1762652580.521771", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/LamarckInfusion-14B-v2-lo", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "id": "sometimesanotion/LamarckInfusion-14B-v2-lo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6787911630030541 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6528441920403686 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42371601208459214 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3859060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4991041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5397273936170213 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2/95f82b68-6135-4d7d-a2f8-b589d4041776.json b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2/95f82b68-6135-4d7d-a2f8-b589d4041776.json new file mode 100644 index 000000000..607c65c44 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v2/95f82b68-6135-4d7d-a2f8-b589d4041776.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_LamarckInfusion-14B-v2/1762652580.521342", + "retrieved_timestamp": "1762652580.521342", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/LamarckInfusion-14B-v2", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "id": "sometimesanotion/LamarckInfusion-14B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6811892445378263 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6564434429766982 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.438821752265861 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3875838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4992604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5416389627659575 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v3/8fe84e89-c582-44d0-b961-d6ed4d889193.json b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v3/8fe84e89-c582-44d0-b961-d6ed4d889193.json new file mode 100644 index 000000000..0fa1b701b --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/LamarckInfusion-14B-v3/8fe84e89-c582-44d0-b961-d6ed4d889193.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_LamarckInfusion-14B-v3/1762652580.5219798", + "retrieved_timestamp": "1762652580.5219798", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/LamarckInfusion-14B-v3", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "id": "sometimesanotion/LamarckInfusion-14B-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7131378076836128 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6517667892516962 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4123867069486405 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38674496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48202083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5407247340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/lamarck-14b-prose-model_stock/3191b3a3-761a-42b4-bd31-b8dc22a4c722.json b/data/hfopenllm_v2/sometimesanotion/lamarck-14b-prose-model_stock/3191b3a3-761a-42b4-bd31-b8dc22a4c722.json new file mode 100644 index 000000000..99762f3b3 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/lamarck-14b-prose-model_stock/3191b3a3-761a-42b4-bd31-b8dc22a4c722.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_lamarck-14b-prose-model_stock/1762652580.5312169", + "retrieved_timestamp": "1762652580.5312169", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/lamarck-14b-prose-model_stock", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "id": "sometimesanotion/lamarck-14b-prose-model_stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4276486389446668 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6487621585665343 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3413897280966767 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3934563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48459375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.535405585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sometimesanotion/lamarck-14b-reason-model_stock/ee7d14c9-aa49-49df-99fc-057e7dae251f.json b/data/hfopenllm_v2/sometimesanotion/lamarck-14b-reason-model_stock/ee7d14c9-aa49-49df-99fc-057e7dae251f.json new file mode 100644 index 000000000..d1a1e57a8 --- /dev/null +++ b/data/hfopenllm_v2/sometimesanotion/lamarck-14b-reason-model_stock/ee7d14c9-aa49-49df-99fc-057e7dae251f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sometimesanotion_lamarck-14b-reason-model_stock/1762652580.531434", + "retrieved_timestamp": "1762652580.531434", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sometimesanotion/lamarck-14b-reason-model_stock", + "developer": "sometimesanotion", + "inference_platform": "unknown", + "id": "sometimesanotion/lamarck-14b-reason-model_stock" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49646715160219335 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6568898541408251 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3580060422960725 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38422818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47408333333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5402260638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415/7aa22e01-efb1-46f3-aad6-cc1fcb2c3783.json b/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415/7aa22e01-efb1-46f3-aad6-cc1fcb2c3783.json new file mode 100644 index 000000000..3ac2c301c --- /dev/null +++ b/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415/7aa22e01-efb1-46f3-aad6-cc1fcb2c3783.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415/1762652580.531641", + "retrieved_timestamp": "1762652580.5316422", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415", + "developer": "sonthenguyen", + "inference_platform": "unknown", + "id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28933784580468713 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38041816886828617 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.011329305135951661 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24664429530201343 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3860625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14012632978723405 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 7.723 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205/c9e9de59-9ec8-4ca9-8869-f77cac14f3ed.json b/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205/c9e9de59-9ec8-4ca9-8869-f77cac14f3ed.json new file mode 100644 index 000000000..d3eb4f0a4 --- /dev/null +++ b/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205/c9e9de59-9ec8-4ca9-8869-f77cac14f3ed.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205/1762652580.531905", + "retrieved_timestamp": "1762652580.5319061", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205", + "developer": "sonthenguyen", + "inference_platform": "unknown", + "id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3199377651298555 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39586243698929185 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.008308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4271770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21243351063829788 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 7.723 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522/1e66ee5b-d3e7-4e2e-8a6f-d098938d4afd.json b/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522/1e66ee5b-d3e7-4e2e-8a6f-d098938d4afd.json new file mode 100644 index 000000000..9c6c41941 --- /dev/null +++ b/data/hfopenllm_v2/sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522/1e66ee5b-d3e7-4e2e-8a6f-d098938d4afd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522/1762652580.532109", + "retrieved_timestamp": "1762652580.53211", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522", + "developer": "sonthenguyen", + "inference_platform": "unknown", + "id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37644117607946914 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3828367247244511 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2651006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4404166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20553523936170212 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 7.723 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps/aabf8b57-c3fd-494b-b8e3-7ff1bdb0a15b.json b/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps/aabf8b57-c3fd-494b-b8e3-7ff1bdb0a15b.json new file mode 100644 index 000000000..8cbf1a75e --- /dev/null +++ b/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps/aabf8b57-c3fd-494b-b8e3-7ff1bdb0a15b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbc-213steps/1762652580.532313", + "retrieved_timestamp": "1762652580.532314", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps", + "developer": "sonthenguyen", + "inference_platform": "unknown", + "id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4275489035758454 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4197290890050172 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0256797583081571 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40863541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27086103723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps/dd216882-a64e-4a0e-8fdc-ff5f99639566.json b/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps/dd216882-a64e-4a0e-8fdc-ff5f99639566.json new file mode 100644 index 000000000..f2af689da --- /dev/null +++ b/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps/dd216882-a64e-4a0e-8fdc-ff5f99639566.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbo-180steps/1762652580.532533", + "retrieved_timestamp": "1762652580.5325341", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps", + "developer": "sonthenguyen", + "inference_platform": "unknown", + "id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40871443325930756 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4322585223071556 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.023413897280966767 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38851041666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27476728723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps/9fa1bbeb-ec5c-4d53-b2f3-eefa660bee5e.json b/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps/9fa1bbeb-ec5c-4d53-b2f3-eefa660bee5e.json new file mode 100644 index 000000000..a3a6e80ca --- /dev/null +++ b/data/hfopenllm_v2/sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps/9fa1bbeb-ec5c-4d53-b2f3-eefa660bee5e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbr-180steps/1762652580.5327501", + "retrieved_timestamp": "1762652580.532751", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps", + "developer": "sonthenguyen", + "inference_platform": "unknown", + "id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4032190144372487 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43053552565190517 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.024924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2802013422818792 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42575 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2711103723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sophosympatheia/Midnight-Miqu-70B-v1.5/3498b101-b86e-4968-abca-a3d3d42a4e5b.json b/data/hfopenllm_v2/sophosympatheia/Midnight-Miqu-70B-v1.5/3498b101-b86e-4968-abca-a3d3d42a4e5b.json new file mode 100644 index 000000000..935f2a9d8 --- /dev/null +++ b/data/hfopenllm_v2/sophosympatheia/Midnight-Miqu-70B-v1.5/3498b101-b86e-4968-abca-a3d3d42a4e5b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sophosympatheia_Midnight-Miqu-70B-v1.5/1762652580.532959", + "retrieved_timestamp": "1762652580.53296", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sophosympatheia/Midnight-Miqu-70B-v1.5", + "developer": "sophosympatheia", + "inference_platform": "unknown", + "id": "sophosympatheia/Midnight-Miqu-70B-v1.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6118465671086051 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5606228371685053 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0702416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42441666666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38248005319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 68.977 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/speakleash/Bielik-11B-v2.0-Instruct/4aaff24b-0364-4cc9-9680-5f5c6d04128b.json b/data/hfopenllm_v2/speakleash/Bielik-11B-v2.0-Instruct/4aaff24b-0364-4cc9-9680-5f5c6d04128b.json new file mode 100644 index 000000000..68794bbd0 --- /dev/null +++ b/data/hfopenllm_v2/speakleash/Bielik-11B-v2.0-Instruct/4aaff24b-0364-4cc9-9680-5f5c6d04128b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/speakleash_Bielik-11B-v2.0-Instruct/1762652580.533494", + "retrieved_timestamp": "1762652580.533494", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "speakleash/Bielik-11B-v2.0-Instruct", + "developer": "speakleash", + "inference_platform": "unknown", + "id": "speakleash/Bielik-11B-v2.0-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5252430218486948 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5361579931173499 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11858006042296072 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31711409395973156 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4467083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3351063829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 11.169 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/speakleash/Bielik-11B-v2.1-Instruct/834e5703-00f3-47d6-817f-cf039c53d915.json b/data/hfopenllm_v2/speakleash/Bielik-11B-v2.1-Instruct/834e5703-00f3-47d6-817f-cf039c53d915.json new file mode 100644 index 000000000..2aaaf60e6 --- /dev/null +++ b/data/hfopenllm_v2/speakleash/Bielik-11B-v2.1-Instruct/834e5703-00f3-47d6-817f-cf039c53d915.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/speakleash_Bielik-11B-v2.1-Instruct/1762652580.533698", + "retrieved_timestamp": "1762652580.533698", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "speakleash/Bielik-11B-v2.1-Instruct", + "developer": "speakleash", + "inference_platform": "unknown", + "id": "speakleash/Bielik-11B-v2.1-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5089817240477489 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5530119844151298 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26661631419939574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.337248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4185208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34466422872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 11.169 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/speakleash/Bielik-11B-v2.2-Instruct/70c377ab-41b4-4c30-ade6-65cc52ab916a.json b/data/hfopenllm_v2/speakleash/Bielik-11B-v2.2-Instruct/70c377ab-41b4-4c30-ade6-65cc52ab916a.json new file mode 100644 index 000000000..12bb164c1 --- /dev/null +++ b/data/hfopenllm_v2/speakleash/Bielik-11B-v2.2-Instruct/70c377ab-41b4-4c30-ade6-65cc52ab916a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/speakleash_Bielik-11B-v2.2-Instruct/1762652580.533901", + "retrieved_timestamp": "1762652580.5339022", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "speakleash/Bielik-11B-v2.2-Instruct", + "developer": "speakleash", + "inference_platform": "unknown", + "id": "speakleash/Bielik-11B-v2.2-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5551935531057595 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5596561190863629 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2681268882175227 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41712499999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3486535904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 11.169 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/speakleash/Bielik-11B-v2.3-Instruct/822b7413-b84e-4df0-8aca-cc0e95283a86.json b/data/hfopenllm_v2/speakleash/Bielik-11B-v2.3-Instruct/822b7413-b84e-4df0-8aca-cc0e95283a86.json new file mode 100644 index 000000000..ab7277428 --- /dev/null +++ b/data/hfopenllm_v2/speakleash/Bielik-11B-v2.3-Instruct/822b7413-b84e-4df0-8aca-cc0e95283a86.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/speakleash_Bielik-11B-v2.3-Instruct/1762652580.534104", + "retrieved_timestamp": "1762652580.534104", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "speakleash/Bielik-11B-v2.3-Instruct", + "developer": "speakleash", + "inference_platform": "unknown", + "id": "speakleash/Bielik-11B-v2.3-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.558290890393046 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5662699020280031 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34060402684563756 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4518229166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34441489361702127 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 11.169 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/speakleash/Bielik-11B-v2/680f5fa0-fb15-4687-a40b-7807af2e0fe5.json b/data/hfopenllm_v2/speakleash/Bielik-11B-v2/680f5fa0-fb15-4687-a40b-7807af2e0fe5.json new file mode 100644 index 000000000..7920c3627 --- /dev/null +++ b/data/hfopenllm_v2/speakleash/Bielik-11B-v2/680f5fa0-fb15-4687-a40b-7807af2e0fe5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/speakleash_Bielik-11B-v2/1762652580.533211", + "retrieved_timestamp": "1762652580.533211", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "speakleash/Bielik-11B-v2", + "developer": "speakleash", + "inference_platform": "unknown", + "id": "speakleash/Bielik-11B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23810489501190177 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49308409091594996 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07854984894259819 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39244791666666673 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3137466755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 11.169 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/spow12/ChatWaifu_12B_v2.0/f9798139-bc7d-49e7-bc42-bcd0ee808c68.json b/data/hfopenllm_v2/spow12/ChatWaifu_12B_v2.0/f9798139-bc7d-49e7-bc42-bcd0ee808c68.json new file mode 100644 index 000000000..12ee4d851 --- /dev/null +++ b/data/hfopenllm_v2/spow12/ChatWaifu_12B_v2.0/f9798139-bc7d-49e7-bc42-bcd0ee808c68.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/spow12_ChatWaifu_12B_v2.0/1762652580.534569", + "retrieved_timestamp": "1762652580.53457", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "spow12/ChatWaifu_12B_v2.0", + "developer": "spow12", + "inference_platform": "unknown", + "id": "spow12/ChatWaifu_12B_v2.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47675833455232114 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5207681738205238 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07099697885196375 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44317708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33876329787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/spow12/ChatWaifu_22B_v2.0_preview/d0e259de-1261-4d31-a1d4-4689112deca0.json b/data/hfopenllm_v2/spow12/ChatWaifu_22B_v2.0_preview/d0e259de-1261-4d31-a1d4-4689112deca0.json new file mode 100644 index 000000000..98a0bfd1f --- /dev/null +++ b/data/hfopenllm_v2/spow12/ChatWaifu_22B_v2.0_preview/d0e259de-1261-4d31-a1d4-4689112deca0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/spow12_ChatWaifu_22B_v2.0_preview/1762652580.534824", + "retrieved_timestamp": "1762652580.5348248", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "spow12/ChatWaifu_22B_v2.0_preview", + "developer": "spow12", + "inference_platform": "unknown", + "id": "spow12/ChatWaifu_22B_v2.0_preview" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6744947849483814 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6170153091362338 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18882175226586104 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31543624161073824 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3685416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39876994680851063 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/spow12/ChatWaifu_v1.4/ac56cc08-585f-4930-959d-7cbad08c34b0.json b/data/hfopenllm_v2/spow12/ChatWaifu_v1.4/ac56cc08-585f-4930-959d-7cbad08c34b0.json new file mode 100644 index 000000000..b783fbe66 --- /dev/null +++ b/data/hfopenllm_v2/spow12/ChatWaifu_v1.4/ac56cc08-585f-4930-959d-7cbad08c34b0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/spow12_ChatWaifu_v1.4/1762652580.535029", + "retrieved_timestamp": "1762652580.5350301", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "spow12/ChatWaifu_v1.4", + "developer": "spow12", + "inference_platform": "unknown", + "id": "spow12/ChatWaifu_v1.4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5690567693719332 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5176247229970669 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10574018126888217 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47433333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3474900265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/7698fd4d-b2d8-4ba9-98be-d96f9c666b2f.json b/data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/7698fd4d-b2d8-4ba9-98be-d96f9c666b2f.json new file mode 100644 index 000000000..63c9e5e4b --- /dev/null +++ b/data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/7698fd4d-b2d8-4ba9-98be-d96f9c666b2f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/spow12_ChatWaifu_v2.0_22B/1762652580.535436", + "retrieved_timestamp": "1762652580.535437", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "spow12/ChatWaifu_v2.0_22B", + "developer": "spow12", + "inference_platform": "unknown", + "id": "spow12/ChatWaifu_v2.0_22B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6517384982956334 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5908050619550995 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20317220543806647 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3841979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3812333776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/cccb45b5-c5cb-43c0-be27-bacbb4db5c5b.json b/data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/cccb45b5-c5cb-43c0-be27-bacbb4db5c5b.json new file mode 100644 index 000000000..45e00bcca --- /dev/null +++ b/data/hfopenllm_v2/spow12/ChatWaifu_v2.0_22B/cccb45b5-c5cb-43c0-be27-bacbb4db5c5b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/spow12_ChatWaifu_v2.0_22B/1762652580.5352252", + "retrieved_timestamp": "1762652580.535226", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "spow12/ChatWaifu_v2.0_22B", + "developer": "spow12", + "inference_platform": "unknown", + "id": "spow12/ChatWaifu_v2.0_22B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6510891102275296 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.592630190761292 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18580060422960726 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32466442953020136 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3841979166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3835605053191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 22.247 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ssmits/Qwen2.5-95B-Instruct/1c441afa-b8ac-4ff9-b881-e75f8765dd8e.json b/data/hfopenllm_v2/ssmits/Qwen2.5-95B-Instruct/1c441afa-b8ac-4ff9-b881-e75f8765dd8e.json new file mode 100644 index 000000000..907a994b0 --- /dev/null +++ b/data/hfopenllm_v2/ssmits/Qwen2.5-95B-Instruct/1c441afa-b8ac-4ff9-b881-e75f8765dd8e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ssmits_Qwen2.5-95B-Instruct/1762652580.535626", + "retrieved_timestamp": "1762652580.5356271", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ssmits/Qwen2.5-95B-Instruct", + "developer": "ssmits", + "inference_platform": "unknown", + "id": "ssmits/Qwen2.5-95B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8431051831363006 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7037799697488242 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5302114803625377 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3640939597315436 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4283854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5216921542553191 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 94.648 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/stabilityai/StableBeluga2/ca7ae45f-833a-4ce2-9fb7-27601e9434c8.json b/data/hfopenllm_v2/stabilityai/StableBeluga2/ca7ae45f-833a-4ce2-9fb7-27601e9434c8.json new file mode 100644 index 000000000..d16ac4851 --- /dev/null +++ b/data/hfopenllm_v2/stabilityai/StableBeluga2/ca7ae45f-833a-4ce2-9fb7-27601e9434c8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/stabilityai_StableBeluga2/1762652580.535889", + "retrieved_timestamp": "1762652580.5358899", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "stabilityai/StableBeluga2", + "developer": "stabilityai", + "inference_platform": "unknown", + "id": "stabilityai/StableBeluga2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37871403431783224 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5824128134553807 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04380664652567976 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3162751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47296875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3326130319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 68.977 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/stabilityai/stablelm-2-12b-chat/22aad948-bcc7-4f8f-bb42-a839e3d1be96.json b/data/hfopenllm_v2/stabilityai/stablelm-2-12b-chat/22aad948-bcc7-4f8f-bb42-a839e3d1be96.json new file mode 100644 index 000000000..b05d1b597 --- /dev/null +++ b/data/hfopenllm_v2/stabilityai/stablelm-2-12b-chat/22aad948-bcc7-4f8f-bb42-a839e3d1be96.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-2-12b-chat/1762652580.536706", + "retrieved_timestamp": "1762652580.5367072", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "stabilityai/stablelm-2-12b-chat", + "developer": "stabilityai", + "inference_platform": "unknown", + "id": "stabilityai/stablelm-2-12b-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4081647805600252 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4672024731282805 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05362537764350453 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3914270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2734375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "StableLmForCausalLM", + "params_billions": 12.143 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/stabilityai/stablelm-2-12b/21f9d0a5-3ed3-40de-a233-a45f68d669e0.json b/data/hfopenllm_v2/stabilityai/stablelm-2-12b/21f9d0a5-3ed3-40de-a233-a45f68d669e0.json new file mode 100644 index 000000000..d1d54894f --- /dev/null +++ b/data/hfopenllm_v2/stabilityai/stablelm-2-12b/21f9d0a5-3ed3-40de-a233-a45f68d669e0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-2-12b/1762652580.536407", + "retrieved_timestamp": "1762652580.536408", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "stabilityai/stablelm-2-12b", + "developer": "stabilityai", + "inference_platform": "unknown", + "id": "stabilityai/stablelm-2-12b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1569214129620518 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4508654171114765 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2785234899328859 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44788541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3071808510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "StableLmForCausalLM", + "params_billions": 12.143 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/stabilityai/stablelm-2-1_6b-chat/552dc523-3082-4980-a533-ad5d48f1260a.json b/data/hfopenllm_v2/stabilityai/stablelm-2-1_6b-chat/552dc523-3082-4980-a533-ad5d48f1260a.json new file mode 100644 index 000000000..df070cbf7 --- /dev/null +++ b/data/hfopenllm_v2/stabilityai/stablelm-2-1_6b-chat/552dc523-3082-4980-a533-ad5d48f1260a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-2-1_6b-chat/1762652580.5372329", + "retrieved_timestamp": "1762652580.5372338", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "stabilityai/stablelm-2-1_6b-chat", + "developer": "stabilityai", + "inference_platform": "unknown", + "id": "stabilityai/stablelm-2-1_6b-chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30599919325168334 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3390172395486522 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.024924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24748322147651006 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35796875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16215093085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "StableLmForCausalLM", + "params_billions": 1.645 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/stabilityai/stablelm-2-1_6b/78db2373-3fcf-468b-8c87-21db03b2fdda.json b/data/hfopenllm_v2/stabilityai/stablelm-2-1_6b/78db2373-3fcf-468b-8c87-21db03b2fdda.json new file mode 100644 index 000000000..8939f8e67 --- /dev/null +++ b/data/hfopenllm_v2/stabilityai/stablelm-2-1_6b/78db2373-3fcf-468b-8c87-21db03b2fdda.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-2-1_6b/1762652580.5369868", + "retrieved_timestamp": "1762652580.536989", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "stabilityai/stablelm-2-1_6b", + "developer": "stabilityai", + "inference_platform": "unknown", + "id": "stabilityai/stablelm-2-1_6b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11570521771122844 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.338457720511071 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0075528700906344415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2483221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38819791666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1463597074468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "StableLmForCausalLM", + "params_billions": 1.645 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/stabilityai/stablelm-2-zephyr-1_6b/96179bdf-3e1a-47ee-9fc2-ac0b23307556.json b/data/hfopenllm_v2/stabilityai/stablelm-2-zephyr-1_6b/96179bdf-3e1a-47ee-9fc2-ac0b23307556.json new file mode 100644 index 000000000..884f6ab74 --- /dev/null +++ b/data/hfopenllm_v2/stabilityai/stablelm-2-zephyr-1_6b/96179bdf-3e1a-47ee-9fc2-ac0b23307556.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-2-zephyr-1_6b/1762652580.537471", + "retrieved_timestamp": "1762652580.537472", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "stabilityai/stablelm-2-zephyr-1_6b", + "developer": "stabilityai", + "inference_platform": "unknown", + "id": "stabilityai/stablelm-2-zephyr-1_6b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32793100085550786 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3351608706280727 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03323262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24328859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3511458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17137632978723405 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "StableLmForCausalLM", + "params_billions": 1.645 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/stabilityai/stablelm-3b-4e1t/3280f4cf-dbb7-46ad-a64c-d4e3c4a58e50.json b/data/hfopenllm_v2/stabilityai/stablelm-3b-4e1t/3280f4cf-dbb7-46ad-a64c-d4e3c4a58e50.json new file mode 100644 index 000000000..397312a11 --- /dev/null +++ b/data/hfopenllm_v2/stabilityai/stablelm-3b-4e1t/3280f4cf-dbb7-46ad-a64c-d4e3c4a58e50.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-3b-4e1t/1762652580.5377111", + "retrieved_timestamp": "1762652580.537712", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "stabilityai/stablelm-3b-4e1t", + "developer": "stabilityai", + "inference_platform": "unknown", + "id": "stabilityai/stablelm-3b-4e1t" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22031986240951784 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3504211415826912 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23741610738255034 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37778124999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1668882978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "StableLmForCausalLM", + "params_billions": 2.795 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/stabilityai/stablelm-zephyr-3b/94960f86-3898-4add-8590-8abeff66a987.json b/data/hfopenllm_v2/stabilityai/stablelm-zephyr-3b/94960f86-3898-4add-8590-8abeff66a987.json new file mode 100644 index 000000000..8b49ac6ac --- /dev/null +++ b/data/hfopenllm_v2/stabilityai/stablelm-zephyr-3b/94960f86-3898-4add-8590-8abeff66a987.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-zephyr-3b/1762652580.537945", + "retrieved_timestamp": "1762652580.5379462", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "stabilityai/stablelm-zephyr-3b", + "developer": "stabilityai", + "inference_platform": "unknown", + "id": "stabilityai/stablelm-zephyr-3b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36832271705740766 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3866361442837871 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23909395973154363 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4183020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17677859042553193 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "StableLmForCausalLM", + "params_billions": 2.795 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno-com/miscii-14b-0130/40a09314-bb43-41ff-a36a-b39064c37add.json b/data/hfopenllm_v2/sthenno-com/miscii-14b-0130/40a09314-bb43-41ff-a36a-b39064c37add.json new file mode 100644 index 000000000..d9ee337ab --- /dev/null +++ b/data/hfopenllm_v2/sthenno-com/miscii-14b-0130/40a09314-bb43-41ff-a36a-b39064c37add.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sthenno-com_miscii-14b-0130/1762652580.540879", + "retrieved_timestamp": "1762652580.54088", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sthenno-com/miscii-14b-0130", + "developer": "sthenno-com", + "inference_platform": "unknown", + "id": "sthenno-com/miscii-14b-0130" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6647029880716498 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6505409113818335 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43202416918429004 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38171140939597314 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4911666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5363198138297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno-com/miscii-14b-0218/f73b09b4-020d-49fd-8ede-6a690088be94.json b/data/hfopenllm_v2/sthenno-com/miscii-14b-0218/f73b09b4-020d-49fd-8ede-6a690088be94.json new file mode 100644 index 000000000..49e44adec --- /dev/null +++ b/data/hfopenllm_v2/sthenno-com/miscii-14b-0218/f73b09b4-020d-49fd-8ede-6a690088be94.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sthenno-com_miscii-14b-0218/1762652580.541173", + "retrieved_timestamp": "1762652580.541174", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sthenno-com/miscii-14b-0218", + "developer": "sthenno-com", + "inference_platform": "unknown", + "id": "sthenno-com/miscii-14b-0218" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7655941790006073 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6558708629267258 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5143504531722054 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38338926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4272708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5297539893617021 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno-com/miscii-14b-1028/3f2549af-9bc5-4ad1-a429-79bbb91c929f.json b/data/hfopenllm_v2/sthenno-com/miscii-14b-1028/3f2549af-9bc5-4ad1-a429-79bbb91c929f.json new file mode 100644 index 000000000..87c6d1f58 --- /dev/null +++ b/data/hfopenllm_v2/sthenno-com/miscii-14b-1028/3f2549af-9bc5-4ad1-a429-79bbb91c929f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sthenno-com_miscii-14b-1028/1762652580.541399", + "retrieved_timestamp": "1762652580.5414", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sthenno-com/miscii-14b-1028", + "developer": "sthenno-com", + "inference_platform": "unknown", + "id": "sthenno-com/miscii-14b-1028" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8236711924360696 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.64483340535341 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5030211480362538 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3565436241610738 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41815625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5152925531914894 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno-com/miscii-14b-1225/ab816ab5-9edb-49d1-8f89-c3dc36a8a0de.json b/data/hfopenllm_v2/sthenno-com/miscii-14b-1225/ab816ab5-9edb-49d1-8f89-c3dc36a8a0de.json new file mode 100644 index 000000000..4e3b240df --- /dev/null +++ b/data/hfopenllm_v2/sthenno-com/miscii-14b-1225/ab816ab5-9edb-49d1-8f89-c3dc36a8a0de.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sthenno-com_miscii-14b-1225/1762652580.541638", + "retrieved_timestamp": "1762652580.5416389", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sthenno-com/miscii-14b-1225", + "developer": "sthenno-com", + "inference_platform": "unknown", + "id": "sthenno-com/miscii-14b-1225" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.787800812954073 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6571708988407374 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4516616314199396 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3775167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4365729166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5271775265957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-0120/9285700f-106e-481d-88bc-5d59b5d57377.json b/data/hfopenllm_v2/sthenno/tempesthenno-0120/9285700f-106e-481d-88bc-5d59b5d57377.json new file mode 100644 index 000000000..7ef4a1ea8 --- /dev/null +++ b/data/hfopenllm_v2/sthenno/tempesthenno-0120/9285700f-106e-481d-88bc-5d59b5d57377.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-0120/1762652580.538178", + "retrieved_timestamp": "1762652580.5381792", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sthenno/tempesthenno-0120", + "developer": "sthenno", + "inference_platform": "unknown", + "id": "sthenno/tempesthenno-0120" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5390319906736348 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6373174111347703 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33534743202416917 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39429530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46332291666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5290059840425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-fusion-0309/97793808-7d23-4ec7-b1dd-0c7b1dea1c3c.json b/data/hfopenllm_v2/sthenno/tempesthenno-fusion-0309/97793808-7d23-4ec7-b1dd-0c7b1dea1c3c.json new file mode 100644 index 000000000..9040a89fe --- /dev/null +++ b/data/hfopenllm_v2/sthenno/tempesthenno-fusion-0309/97793808-7d23-4ec7-b1dd-0c7b1dea1c3c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-fusion-0309/1762652580.538481", + "retrieved_timestamp": "1762652580.538483", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sthenno/tempesthenno-fusion-0309", + "developer": "sthenno", + "inference_platform": "unknown", + "id": "sthenno/tempesthenno-fusion-0309" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7691913013027656 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6580880569586895 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47658610271903323 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3699664429530201 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4325104166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5258477393617021 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-kto-0205-ckpt80/689a346d-191e-4ec1-93b5-6f64c1a293ff.json b/data/hfopenllm_v2/sthenno/tempesthenno-kto-0205-ckpt80/689a346d-191e-4ec1-93b5-6f64c1a293ff.json new file mode 100644 index 000000000..8124f6b2a --- /dev/null +++ b/data/hfopenllm_v2/sthenno/tempesthenno-kto-0205-ckpt80/689a346d-191e-4ec1-93b5-6f64c1a293ff.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-kto-0205-ckpt80/1762652580.5387661", + "retrieved_timestamp": "1762652580.538767", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sthenno/tempesthenno-kto-0205-ckpt80", + "developer": "sthenno", + "inference_platform": "unknown", + "id": "sthenno/tempesthenno-kto-0205-ckpt80" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8054362425032248 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.654273895095419 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34815436241610737 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4247604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5285904255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-001/1d12c40a-a9b5-483b-aaac-07e323de73a9.json b/data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-001/1d12c40a-a9b5-483b-aaac-07e323de73a9.json new file mode 100644 index 000000000..cbfb09098 --- /dev/null +++ b/data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-001/1d12c40a-a9b5-483b-aaac-07e323de73a9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-nuslerp-001/1762652580.5390232", + "retrieved_timestamp": "1762652580.5390239", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sthenno/tempesthenno-nuslerp-001", + "developer": "sthenno", + "inference_platform": "unknown", + "id": "sthenno/tempesthenno-nuslerp-001" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7926468437080281 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6577675676172494 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47583081570996977 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3733221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5256815159574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-0124/b814d738-b9f3-42df-8774-0708d456c2ea.json b/data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-0124/b814d738-b9f3-42df-8774-0708d456c2ea.json new file mode 100644 index 000000000..67d6c3139 --- /dev/null +++ b/data/hfopenllm_v2/sthenno/tempesthenno-nuslerp-0124/b814d738-b9f3-42df-8774-0708d456c2ea.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-nuslerp-0124/1762652580.539254", + "retrieved_timestamp": "1762652580.5392551", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sthenno/tempesthenno-nuslerp-0124", + "developer": "sthenno", + "inference_platform": "unknown", + "id": "sthenno/tempesthenno-nuslerp-0124" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7003982765728267 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6468547741903091 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.411631419939577 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3901006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48592708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5352393617021277 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-ppo-ckpt40/7c2e9776-92e4-457b-ae08-32c3e351b8e1.json b/data/hfopenllm_v2/sthenno/tempesthenno-ppo-ckpt40/7c2e9776-92e4-457b-ae08-32c3e351b8e1.json new file mode 100644 index 000000000..81f0aa71f --- /dev/null +++ b/data/hfopenllm_v2/sthenno/tempesthenno-ppo-ckpt40/7c2e9776-92e4-457b-ae08-32c3e351b8e1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-ppo-ckpt40/1762652580.539634", + "retrieved_timestamp": "1762652580.539635", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sthenno/tempesthenno-ppo-ckpt40", + "developer": "sthenno", + "inference_platform": "unknown", + "id": "sthenno/tempesthenno-ppo-ckpt40" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7923221496739761 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6549600322869433 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4735649546827795 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3775167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4351770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5291722074468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-sft-0309-ckpt10/65f19ffe-7428-41e5-a52d-02fad8e595c0.json b/data/hfopenllm_v2/sthenno/tempesthenno-sft-0309-ckpt10/65f19ffe-7428-41e5-a52d-02fad8e595c0.json new file mode 100644 index 000000000..865ca0a18 --- /dev/null +++ b/data/hfopenllm_v2/sthenno/tempesthenno-sft-0309-ckpt10/65f19ffe-7428-41e5-a52d-02fad8e595c0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-sft-0309-ckpt10/1762652580.539892", + "retrieved_timestamp": "1762652580.539893", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sthenno/tempesthenno-sft-0309-ckpt10", + "developer": "sthenno", + "inference_platform": "unknown", + "id": "sthenno/tempesthenno-sft-0309-ckpt10" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7743620260907724 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6551647758995857 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47205438066465255 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3716442953020134 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4364166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5257646276595744 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno/tempesthenno-sft-0314-stage1-ckpt50/07d2cbaf-fa54-4d0b-bdb7-4179b5f3bebe.json b/data/hfopenllm_v2/sthenno/tempesthenno-sft-0314-stage1-ckpt50/07d2cbaf-fa54-4d0b-bdb7-4179b5f3bebe.json new file mode 100644 index 000000000..916ac5c5c --- /dev/null +++ b/data/hfopenllm_v2/sthenno/tempesthenno-sft-0314-stage1-ckpt50/07d2cbaf-fa54-4d0b-bdb7-4179b5f3bebe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-sft-0314-stage1-ckpt50/1762652580.540305", + "retrieved_timestamp": "1762652580.540307", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sthenno/tempesthenno-sft-0314-stage1-ckpt50", + "developer": "sthenno", + "inference_platform": "unknown", + "id": "sthenno/tempesthenno-sft-0314-stage1-ckpt50" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7393659933421101 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6601015847983588 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46827794561933533 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3733221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44286458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5301695478723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sthenno/tempestissimo-14b-0309/eab26e25-e8bd-4c19-8f14-a933506372c6.json b/data/hfopenllm_v2/sthenno/tempestissimo-14b-0309/eab26e25-e8bd-4c19-8f14-a933506372c6.json new file mode 100644 index 000000000..926380e5d --- /dev/null +++ b/data/hfopenllm_v2/sthenno/tempestissimo-14b-0309/eab26e25-e8bd-4c19-8f14-a933506372c6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sthenno_tempestissimo-14b-0309/1762652580.540641", + "retrieved_timestamp": "1762652580.540643", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sthenno/tempestissimo-14b-0309", + "developer": "sthenno", + "inference_platform": "unknown", + "id": "sthenno/tempestissimo-14b-0309" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7548781677061308 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6587329699954757 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.479607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36661073825503354 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43123958333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.528091755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/streamerbtw1002/Nexuim-R1-7B-Instruct/3e78ef29-f546-41b0-af2b-f3ae4154e396.json b/data/hfopenllm_v2/streamerbtw1002/Nexuim-R1-7B-Instruct/3e78ef29-f546-41b0-af2b-f3ae4154e396.json new file mode 100644 index 000000000..3118d17ae --- /dev/null +++ b/data/hfopenllm_v2/streamerbtw1002/Nexuim-R1-7B-Instruct/3e78ef29-f546-41b0-af2b-f3ae4154e396.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/streamerbtw1002_Nexuim-R1-7B-Instruct/1762652580.541884", + "retrieved_timestamp": "1762652580.541885", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "streamerbtw1002/Nexuim-R1-7B-Instruct", + "developer": "streamerbtw1002", + "inference_platform": "unknown", + "id": "streamerbtw1002/Nexuim-R1-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6934289906337407 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5175174748142363 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44561933534743203 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33555208333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.413813164893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/stupidity-ai/Llama-3-8B-Instruct-MultiMoose/2f177d4b-50fb-4a87-a157-84d1094d3971.json b/data/hfopenllm_v2/stupidity-ai/Llama-3-8B-Instruct-MultiMoose/2f177d4b-50fb-4a87-a157-84d1094d3971.json new file mode 100644 index 000000000..684fa899e --- /dev/null +++ b/data/hfopenllm_v2/stupidity-ai/Llama-3-8B-Instruct-MultiMoose/2f177d4b-50fb-4a87-a157-84d1094d3971.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/stupidity-ai_Llama-3-8B-Instruct-MultiMoose/1762652580.5421681", + "retrieved_timestamp": "1762652580.542169", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "stupidity-ai/Llama-3-8B-Instruct-MultiMoose", + "developer": "stupidity-ai", + "inference_platform": "unknown", + "id": "stupidity-ai/Llama-3-8B-Instruct-MultiMoose" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23181048506850713 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2822965317600308 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3485416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.109375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.1/b1070a2a-7694-472d-84a4-f20f4cfe1b88.json b/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.1/b1070a2a-7694-472d-84a4-f20f4cfe1b88.json new file mode 100644 index 000000000..13b8dc9db --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.1/b1070a2a-7694-472d-84a4-f20f4cfe1b88.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/suayptalha_Clarus-7B-v0.1/1762652580.542475", + "retrieved_timestamp": "1762652580.5424771", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "suayptalha/Clarus-7B-v0.1", + "developer": "suayptalha", + "inference_platform": "unknown", + "id": "suayptalha/Clarus-7B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7454110648634512 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5496611433440965 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49244712990936557 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44295833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4387466755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.2/c85bdaec-43e5-4507-a615-89549901e392.json b/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.2/c85bdaec-43e5-4507-a615-89549901e392.json new file mode 100644 index 000000000..c9da529bc --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.2/c85bdaec-43e5-4507-a615-89549901e392.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/suayptalha_Clarus-7B-v0.2/1762652580.542793", + "retrieved_timestamp": "1762652580.542794", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "suayptalha/Clarus-7B-v0.2", + "developer": "suayptalha", + "inference_platform": "unknown", + "id": "suayptalha/Clarus-7B-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7679423928509688 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5490057426751466 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48564954682779454 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44165625000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4399933510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.613 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.3/21d1f676-4a7d-4305-b248-4a72d7ce0121.json b/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.3/21d1f676-4a7d-4305-b248-4a72d7ce0121.json new file mode 100644 index 000000000..1938461c0 --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/Clarus-7B-v0.3/21d1f676-4a7d-4305-b248-4a72d7ce0121.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/suayptalha_Clarus-7B-v0.3/1762652580.543006", + "retrieved_timestamp": "1762652580.543007", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "suayptalha/Clarus-7B-v0.3", + "developer": "suayptalha", + "inference_platform": "unknown", + "id": "suayptalha/Clarus-7B-v0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7509064836855099 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5525985716155296 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4879154078549849 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44022916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4384973404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/Falcon3-Jessi-v0.4-7B-Slerp/9a9cb5f7-e95a-46c5-90ed-42152fc0a617.json b/data/hfopenllm_v2/suayptalha/Falcon3-Jessi-v0.4-7B-Slerp/9a9cb5f7-e95a-46c5-90ed-42152fc0a617.json new file mode 100644 index 000000000..327153093 --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/Falcon3-Jessi-v0.4-7B-Slerp/9a9cb5f7-e95a-46c5-90ed-42152fc0a617.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/suayptalha_Falcon3-Jessi-v0.4-7B-Slerp/1762652580.543463", + "retrieved_timestamp": "1762652580.543463", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "suayptalha/Falcon3-Jessi-v0.4-7B-Slerp", + "developer": "suayptalha", + "inference_platform": "unknown", + "id": "suayptalha/Falcon3-Jessi-v0.4-7B-Slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7676176988169169 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5590927389495824 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39652567975830816 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48121875000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.406000664893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/HomerCreativeAnvita-Mix-Qw7B/7bb9a15a-ece4-4fb7-b0ae-dc8cf69efb6b.json b/data/hfopenllm_v2/suayptalha/HomerCreativeAnvita-Mix-Qw7B/7bb9a15a-ece4-4fb7-b0ae-dc8cf69efb6b.json new file mode 100644 index 000000000..549eeb079 --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/HomerCreativeAnvita-Mix-Qw7B/7bb9a15a-ece4-4fb7-b0ae-dc8cf69efb6b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/suayptalha_HomerCreativeAnvita-Mix-Qw7B/1762652580.543669", + "retrieved_timestamp": "1762652580.54367", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "suayptalha/HomerCreativeAnvita-Mix-Qw7B", + "developer": "suayptalha", + "inference_platform": "unknown", + "id": "suayptalha/HomerCreativeAnvita-Mix-Qw7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7807816593305763 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5564653181490319 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3610271903323263 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44159375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4444813829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/Lamarckvergence-14B/2c918f65-3565-41f6-a9c2-d042608bc592.json b/data/hfopenllm_v2/suayptalha/Lamarckvergence-14B/2c918f65-3565-41f6-a9c2-d042608bc592.json new file mode 100644 index 000000000..d5d366a9b --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/Lamarckvergence-14B/2c918f65-3565-41f6-a9c2-d042608bc592.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/suayptalha_Lamarckvergence-14B/1762652580.544092", + "retrieved_timestamp": "1762652580.544093", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "suayptalha/Lamarckvergence-14B", + "developer": "suayptalha", + "inference_platform": "unknown", + "id": "suayptalha/Lamarckvergence-14B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7655941790006073 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.651698573892736 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5400302114803626 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36325503355704697 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44215625000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5283410904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/Lix-14B-v0.1/f4866eb3-28b0-416b-92c7-764d38905686.json b/data/hfopenllm_v2/suayptalha/Lix-14B-v0.1/f4866eb3-28b0-416b-92c7-764d38905686.json new file mode 100644 index 000000000..3d63f2e3f --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/Lix-14B-v0.1/f4866eb3-28b0-416b-92c7-764d38905686.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/suayptalha_Lix-14B-v0.1/1762652580.5443048", + "retrieved_timestamp": "1762652580.5443058", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "suayptalha/Lix-14B-v0.1", + "developer": "suayptalha", + "inference_platform": "unknown", + "id": "suayptalha/Lix-14B-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7813313120298586 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6607910825152539 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5294561933534743 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3699664429530201 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43378125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5314162234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/Maestro-10B/b302d40a-64bd-4cdd-b5fb-3a9c1dbf1406.json b/data/hfopenllm_v2/suayptalha/Maestro-10B/b302d40a-64bd-4cdd-b5fb-3a9c1dbf1406.json new file mode 100644 index 000000000..b280fd7c3 --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/Maestro-10B/b302d40a-64bd-4cdd-b5fb-3a9c1dbf1406.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/suayptalha_Maestro-10B/1762652580.5447612", + "retrieved_timestamp": "1762652580.5447621", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "suayptalha/Maestro-10B", + "developer": "suayptalha", + "inference_platform": "unknown", + "id": "suayptalha/Maestro-10B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7767601076255447 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5746090622656775 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19108761329305135 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33305369127516776 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43972916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42179188829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/suayptalha/Rombos-2.5-T.E-8.1/fa7a31f9-9c10-4f5f-a06f-e628363a726a.json b/data/hfopenllm_v2/suayptalha/Rombos-2.5-T.E-8.1/fa7a31f9-9c10-4f5f-a06f-e628363a726a.json new file mode 100644 index 000000000..74c83f539 --- /dev/null +++ b/data/hfopenllm_v2/suayptalha/Rombos-2.5-T.E-8.1/fa7a31f9-9c10-4f5f-a06f-e628363a726a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/suayptalha_Rombos-2.5-T.E-8.1/1762652580.544959", + "retrieved_timestamp": "1762652580.544959", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "suayptalha/Rombos-2.5-T.E-8.1", + "developer": "suayptalha", + "inference_platform": "unknown", + "id": "suayptalha/Rombos-2.5-T.E-8.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6925047762159957 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5514641249478369 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49244712990936557 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41663541666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4445644946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/Qmerft/11243917-73a3-484e-ac8b-40065c65ea8c.json b/data/hfopenllm_v2/sumink/Qmerft/11243917-73a3-484e-ac8b-40065c65ea8c.json new file mode 100644 index 000000000..8c6d07730 --- /dev/null +++ b/data/hfopenllm_v2/sumink/Qmerft/11243917-73a3-484e-ac8b-40065c65ea8c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_Qmerft/1762652580.5451572", + "retrieved_timestamp": "1762652580.5451572", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/Qmerft", + "developer": "sumink", + "inference_platform": "unknown", + "id": "sumink/Qmerft" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15639724819035714 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29390930175643937 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0022658610271903325 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36876041666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11569148936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.777 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/llftfl7/ed7c36f0-5b1a-45ef-be66-f9880cad099d.json b/data/hfopenllm_v2/sumink/llftfl7/ed7c36f0-5b1a-45ef-be66-f9880cad099d.json new file mode 100644 index 000000000..b1d386585 --- /dev/null +++ b/data/hfopenllm_v2/sumink/llftfl7/ed7c36f0-5b1a-45ef-be66-f9880cad099d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_llftfl7/1762652580.548197", + "retrieved_timestamp": "1762652580.548198", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/llftfl7", + "developer": "sumink", + "inference_platform": "unknown", + "id": "sumink/llftfl7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17143512546709397 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37864273336631166 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.010574018126888218 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36320833333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17428523936170212 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/llmer/8f2bad2c-5c31-433a-bbf0-f1a8f0a80c3a.json b/data/hfopenllm_v2/sumink/llmer/8f2bad2c-5c31-433a-bbf0-f1a8f0a80c3a.json new file mode 100644 index 000000000..09ee4fe8c --- /dev/null +++ b/data/hfopenllm_v2/sumink/llmer/8f2bad2c-5c31-433a-bbf0-f1a8f0a80c3a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_llmer/1762652580.548394", + "retrieved_timestamp": "1762652580.548395", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/llmer", + "developer": "sumink", + "inference_platform": "unknown", + "id": "sumink/llmer" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3191132860809319 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4884590875207178 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0649546827794562 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4039166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35289228723404253 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/qwft/6cdf831f-3ccd-4d78-a94f-269ace42fc1c.json b/data/hfopenllm_v2/sumink/qwft/6cdf831f-3ccd-4d78-a94f-269ace42fc1c.json new file mode 100644 index 000000000..8c73496fb --- /dev/null +++ b/data/hfopenllm_v2/sumink/qwft/6cdf831f-3ccd-4d78-a94f-269ace42fc1c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_qwft/1762652580.548597", + "retrieved_timestamp": "1762652580.548597", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/qwft", + "developer": "sumink", + "inference_platform": "unknown", + "id": "sumink/qwft" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11965252197502627 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30021752093452153 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3580625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11294880319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/qwmer/2cd4d3ec-2800-4223-ab50-6f9f4a1e1a57.json b/data/hfopenllm_v2/sumink/qwmer/2cd4d3ec-2800-4223-ab50-6f9f4a1e1a57.json new file mode 100644 index 000000000..3023c1b95 --- /dev/null +++ b/data/hfopenllm_v2/sumink/qwmer/2cd4d3ec-2800-4223-ab50-6f9f4a1e1a57.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_qwmer/1762652580.54879", + "retrieved_timestamp": "1762652580.548791", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/qwmer", + "developer": "sumink", + "inference_platform": "unknown", + "id": "sumink/qwmer" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22124407682726277 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4298800979582788 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0007552870090634441 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28691275167785235 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4031770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22149268617021275 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/solarmer3/59ebeb48-88c4-4c63-92bb-888752ea9dad.json b/data/hfopenllm_v2/sumink/solarmer3/59ebeb48-88c4-4c63-92bb-888752ea9dad.json new file mode 100644 index 000000000..16184f323 --- /dev/null +++ b/data/hfopenllm_v2/sumink/solarmer3/59ebeb48-88c4-4c63-92bb-888752ea9dad.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_solarmer3/1762652580.5489879", + "retrieved_timestamp": "1762652580.5489888", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/solarmer3", + "developer": "sumink", + "inference_platform": "unknown", + "id": "sumink/solarmer3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3741428299135183 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5265990319952963 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0581570996978852 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2911073825503356 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44013541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.332280585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/somer/282fa475-0ac8-4230-8020-9dbb7fda03da.json b/data/hfopenllm_v2/sumink/somer/282fa475-0ac8-4230-8020-9dbb7fda03da.json new file mode 100644 index 000000000..d0e1b6314 --- /dev/null +++ b/data/hfopenllm_v2/sumink/somer/282fa475-0ac8-4230-8020-9dbb7fda03da.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_somer/1762652580.549191", + "retrieved_timestamp": "1762652580.549192", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/somer", + "developer": "sumink", + "inference_platform": "unknown", + "id": "sumink/somer" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29902990731259727 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.519370328606347 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04154078549848943 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.465 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3447473404255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/somer2/fee6fbc3-c115-4668-8b5b-35b307c15fe8.json b/data/hfopenllm_v2/sumink/somer2/fee6fbc3-c115-4668-8b5b-35b307c15fe8.json new file mode 100644 index 000000000..7b6533926 --- /dev/null +++ b/data/hfopenllm_v2/sumink/somer2/fee6fbc3-c115-4668-8b5b-35b307c15fe8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_somer2/1762652580.549396", + "retrieved_timestamp": "1762652580.549397", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/somer2", + "developer": "sumink", + "inference_platform": "unknown", + "id": "sumink/somer2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3132433055404106 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5166793474130525 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04682779456193353 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46630208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34325132978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sumink/somerft/cb6879a2-41b6-40b6-bb20-723aa0b213e1.json b/data/hfopenllm_v2/sumink/somerft/cb6879a2-41b6-40b6-bb20-723aa0b213e1.json new file mode 100644 index 000000000..2fe017892 --- /dev/null +++ b/data/hfopenllm_v2/sumink/somerft/cb6879a2-41b6-40b6-bb20-723aa0b213e1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sumink_somerft/1762652580.5496058", + "retrieved_timestamp": "1762652580.5496068", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sumink/somerft", + "developer": "sumink", + "inference_platform": "unknown", + "id": "sumink/somerft" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14305819669587805 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3093455213252133 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2483221476510067 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40447916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11170212765957446 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.543 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/sunbaby/BrainCog-8B-0.1-Instruct/96412e92-8a74-429b-8014-30a526521356.json b/data/hfopenllm_v2/sunbaby/BrainCog-8B-0.1-Instruct/96412e92-8a74-429b-8014-30a526521356.json new file mode 100644 index 000000000..d28e6637f --- /dev/null +++ b/data/hfopenllm_v2/sunbaby/BrainCog-8B-0.1-Instruct/96412e92-8a74-429b-8014-30a526521356.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/sunbaby_BrainCog-8B-0.1-Instruct/1762652580.549814", + "retrieved_timestamp": "1762652580.549815", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "sunbaby/BrainCog-8B-0.1-Instruct", + "developer": "sunbaby", + "inference_platform": "unknown", + "id": "sunbaby/BrainCog-8B-0.1-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4253004250943053 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46182179983247446 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09667673716012085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011744966442953 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36559375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28582114361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA/f2475574-fc9d-4cd1-94fb-ddd8bb89fa95.json b/data/hfopenllm_v2/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA/f2475574-fc9d-4cd1-94fb-ddd8bb89fa95.json new file mode 100644 index 000000000..aa37fff4d --- /dev/null +++ b/data/hfopenllm_v2/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA/f2475574-fc9d-4cd1-94fb-ddd8bb89fa95.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/swap-uniba_LLaMAntino-3-ANITA-8B-Inst-DPO-ITA/1762652580.550269", + "retrieved_timestamp": "1762652580.5502698", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA", + "developer": "swap-uniba", + "inference_platform": "unknown", + "id": "swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4815046299374548 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4935698792285044 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04833836858006042 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43873958333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3723404255319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/talha2001/Beast-Soul-new/01f536ff-7613-4b09-b793-1f51bf32f705.json b/data/hfopenllm_v2/talha2001/Beast-Soul-new/01f536ff-7613-4b09-b793-1f51bf32f705.json new file mode 100644 index 000000000..68b7b1699 --- /dev/null +++ b/data/hfopenllm_v2/talha2001/Beast-Soul-new/01f536ff-7613-4b09-b793-1f51bf32f705.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/talha2001_Beast-Soul-new/1762652580.5509062", + "retrieved_timestamp": "1762652580.5509079", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "talha2001/Beast-Soul-new", + "developer": "talha2001", + "inference_platform": "unknown", + "id": "talha2001/Beast-Soul-new" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4853510906616666 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5227143628884523 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07401812688821752 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4459270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3101728723404255 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct/727047f6-974d-4980-a8cd-672728885485.json b/data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct/727047f6-974d-4980-a8cd-672728885485.json new file mode 100644 index 000000000..16378c159 --- /dev/null +++ b/data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct/727047f6-974d-4980-a8cd-672728885485.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tangledgroup_tangled-llama-pints-1.5b-v0.1-instruct/1762652580.5513222", + "retrieved_timestamp": "1762652580.5513222", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct", + "developer": "tangledgroup", + "inference_platform": "unknown", + "id": "tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15090182936829835 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31434444692284963 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23993288590604026 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37613541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11087101063829788 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.5 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct/3964e579-bb1f-46be-8740-ba8097d8f7ef.json b/data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct/3964e579-bb1f-46be-8740-ba8097d8f7ef.json new file mode 100644 index 000000000..bf3276165 --- /dev/null +++ b/data/hfopenllm_v2/tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct/3964e579-bb1f-46be-8740-ba8097d8f7ef.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tangledgroup_tangled-llama-pints-1.5b-v0.2-instruct/1762652580.551594", + "retrieved_timestamp": "1762652580.551595", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct", + "developer": "tangledgroup", + "inference_platform": "unknown", + "id": "tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1724092075692496 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3158349391752727 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24161073825503357 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3642916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11170212765957446 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.5 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/6dd14f37-6493-4f9d-a5a8-6ad62aa4ca04.json b/data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/6dd14f37-6493-4f9d-a5a8-6ad62aa4ca04.json new file mode 100644 index 000000000..fdc29a139 --- /dev/null +++ b/data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/6dd14f37-6493-4f9d-a5a8-6ad62aa4ca04.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tanliboy_lambda-gemma-2-9b-dpo/1762652580.551808", + "retrieved_timestamp": "1762652580.551809", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tanliboy/lambda-gemma-2-9b-dpo", + "developer": "tanliboy", + "inference_platform": "unknown", + "id": "tanliboy/lambda-gemma-2-9b-dpo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45008023156336296 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.547172399190412 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09441087613293052 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40165625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.379155585106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/fe623f86-5397-4818-aa3f-75c2f6632bec.json b/data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/fe623f86-5397-4818-aa3f-75c2f6632bec.json new file mode 100644 index 000000000..ec9293671 --- /dev/null +++ b/data/hfopenllm_v2/tanliboy/lambda-gemma-2-9b-dpo/fe623f86-5397-4818-aa3f-75c2f6632bec.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tanliboy_lambda-gemma-2-9b-dpo/1762652580.5520582", + "retrieved_timestamp": "1762652580.5520582", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tanliboy/lambda-gemma-2-9b-dpo", + "developer": "tanliboy", + "inference_platform": "unknown", + "id": "tanliboy/lambda-gemma-2-9b-dpo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18292463995531855 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5487911206515993 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40562499999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3804853723404255 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 9.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tanliboy/lambda-qwen2.5-14b-dpo-test/04686df9-9ef7-4df9-bb1e-a4c113a6e32e.json b/data/hfopenllm_v2/tanliboy/lambda-qwen2.5-14b-dpo-test/04686df9-9ef7-4df9-bb1e-a4c113a6e32e.json new file mode 100644 index 000000000..7c9390e7e --- /dev/null +++ b/data/hfopenllm_v2/tanliboy/lambda-qwen2.5-14b-dpo-test/04686df9-9ef7-4df9-bb1e-a4c113a6e32e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tanliboy_lambda-qwen2.5-14b-dpo-test/1762652580.5523891", + "retrieved_timestamp": "1762652580.5523908", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tanliboy/lambda-qwen2.5-14b-dpo-test", + "developer": "tanliboy", + "inference_platform": "unknown", + "id": "tanliboy/lambda-qwen2.5-14b-dpo-test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8231215397367873 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6393505282981286 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5460725075528701 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3624161073825503 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42603125000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4847905585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tanliboy/lambda-qwen2.5-32b-dpo-test/87569202-e422-423b-a2a6-96f94dbaf99c.json b/data/hfopenllm_v2/tanliboy/lambda-qwen2.5-32b-dpo-test/87569202-e422-423b-a2a6-96f94dbaf99c.json new file mode 100644 index 000000000..06e55aede --- /dev/null +++ b/data/hfopenllm_v2/tanliboy/lambda-qwen2.5-32b-dpo-test/87569202-e422-423b-a2a6-96f94dbaf99c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tanliboy_lambda-qwen2.5-32b-dpo-test/1762652580.552684", + "retrieved_timestamp": "1762652580.552685", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tanliboy/lambda-qwen2.5-32b-dpo-test", + "developer": "tanliboy", + "inference_platform": "unknown", + "id": "tanliboy/lambda-qwen2.5-32b-dpo-test" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8083839767372794 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6763904009446838 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6102719033232629 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3565436241610738 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42742708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.565658244680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tannedbum/Ellaria-9B/ca946b2a-4345-42b9-aefd-0907b91759d7.json b/data/hfopenllm_v2/tannedbum/Ellaria-9B/ca946b2a-4345-42b9-aefd-0907b91759d7.json new file mode 100644 index 000000000..a89d998e4 --- /dev/null +++ b/data/hfopenllm_v2/tannedbum/Ellaria-9B/ca946b2a-4345-42b9-aefd-0907b91759d7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tannedbum_Ellaria-9B/1762652580.5529752", + "retrieved_timestamp": "1762652580.552976", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tannedbum/Ellaria-9B", + "developer": "tannedbum", + "inference_platform": "unknown", + "id": "tannedbum/Ellaria-9B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7825802204816554 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5942102115140485 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20770392749244712 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33305369127516776 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4151458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42054521276595747 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 10.159 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tannedbum/L3-Nymeria-Maid-8B/3b1941a4-b8ca-49f4-9c09-18beb1b470e4.json b/data/hfopenllm_v2/tannedbum/L3-Nymeria-Maid-8B/3b1941a4-b8ca-49f4-9c09-18beb1b470e4.json new file mode 100644 index 000000000..99d6d9470 --- /dev/null +++ b/data/hfopenllm_v2/tannedbum/L3-Nymeria-Maid-8B/3b1941a4-b8ca-49f4-9c09-18beb1b470e4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tannedbum_L3-Nymeria-Maid-8B/1762652580.553287", + "retrieved_timestamp": "1762652580.553288", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tannedbum/L3-Nymeria-Maid-8B", + "developer": "tannedbum", + "inference_platform": "unknown", + "id": "tannedbum/L3-Nymeria-Maid-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7250029920610646 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5146055785516804 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09365558912386707 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37505208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37466755319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tannedbum/L3-Nymeria-v2-8B/61d5c969-6aff-49b7-8fa3-bcf0ff0b661d.json b/data/hfopenllm_v2/tannedbum/L3-Nymeria-v2-8B/61d5c969-6aff-49b7-8fa3-bcf0ff0b661d.json new file mode 100644 index 000000000..1fe7444e9 --- /dev/null +++ b/data/hfopenllm_v2/tannedbum/L3-Nymeria-v2-8B/61d5c969-6aff-49b7-8fa3-bcf0ff0b661d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tannedbum_L3-Nymeria-v2-8B/1762652580.553518", + "retrieved_timestamp": "1762652580.553519", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tannedbum/L3-Nymeria-v2-8B", + "developer": "tannedbum", + "inference_platform": "unknown", + "id": "tannedbum/L3-Nymeria-v2-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7168346653545925 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5224198261531375 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09214501510574018 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902684563758389 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.369875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37533244680851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tannedbum/L3-Rhaenys-8B/c44ac25e-9139-477d-abcd-442b3a0dc2cf.json b/data/hfopenllm_v2/tannedbum/L3-Rhaenys-8B/c44ac25e-9139-477d-abcd-442b3a0dc2cf.json new file mode 100644 index 000000000..53825484b --- /dev/null +++ b/data/hfopenllm_v2/tannedbum/L3-Rhaenys-8B/c44ac25e-9139-477d-abcd-442b3a0dc2cf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tannedbum_L3-Rhaenys-8B/1762652580.553731", + "retrieved_timestamp": "1762652580.5537322", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tannedbum/L3-Rhaenys-8B", + "developer": "tannedbum", + "inference_platform": "unknown", + "id": "tannedbum/L3-Rhaenys-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7362686560548235 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5299209893116719 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08761329305135952 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2978187919463087 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3724791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3799035904255319 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/teknium/OpenHermes-13B/55d876b7-159e-4c76-848b-1480b4c2f4a2.json b/data/hfopenllm_v2/teknium/OpenHermes-13B/55d876b7-159e-4c76-848b-1480b4c2f4a2.json new file mode 100644 index 000000000..91252ed59 --- /dev/null +++ b/data/hfopenllm_v2/teknium/OpenHermes-13B/55d876b7-159e-4c76-848b-1480b4c2f4a2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/teknium_OpenHermes-13B/1762652580.5542011", + "retrieved_timestamp": "1762652580.554202", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "teknium/OpenHermes-13B", + "developer": "teknium", + "inference_platform": "unknown", + "id": "teknium/OpenHermes-13B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2668065178171696 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42064384521911524 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4042604166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23894614361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/teknium/OpenHermes-7B/089f10dc-8be6-4595-a0b3-7d5bb4fc13fa.json b/data/hfopenllm_v2/teknium/OpenHermes-7B/089f10dc-8be6-4595-a0b3-7d5bb4fc13fa.json new file mode 100644 index 000000000..279374396 --- /dev/null +++ b/data/hfopenllm_v2/teknium/OpenHermes-7B/089f10dc-8be6-4595-a0b3-7d5bb4fc13fa.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/teknium_OpenHermes-7B/1762652580.5548952", + "retrieved_timestamp": "1762652580.5548952", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "teknium/OpenHermes-7B", + "developer": "teknium", + "inference_platform": "unknown", + "id": "teknium/OpenHermes-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1812513021006485 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.362033648602934 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4323854166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19331781914893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v1/d59c7d7c-99a9-4de5-9a69-60b934eafa1b.json b/data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v1/d59c7d7c-99a9-4de5-9a69-60b934eafa1b.json new file mode 100644 index 000000000..d44135c58 --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v1/d59c7d7c-99a9-4de5-9a69-60b934eafa1b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tensopolis_falcon3-10b-tensopolis-v1/1762652580.555104", + "retrieved_timestamp": "1762652580.555105", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tensopolis/falcon3-10b-tensopolis-v1", + "developer": "tensopolis", + "inference_platform": "unknown", + "id": "tensopolis/falcon3-10b-tensopolis-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7816560060639104 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.618226655000786 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27492447129909364 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3296979865771812 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43753125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4419880319148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v2/ce5dfe15-432b-42ac-9ef1-569ab4e640a6.json b/data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v2/ce5dfe15-432b-42ac-9ef1-569ab4e640a6.json new file mode 100644 index 000000000..2f009851b --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/falcon3-10b-tensopolis-v2/ce5dfe15-432b-42ac-9ef1-569ab4e640a6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tensopolis_falcon3-10b-tensopolis-v2/1762652580.555352", + "retrieved_timestamp": "1762652580.5553532", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tensopolis/falcon3-10b-tensopolis-v2", + "developer": "tensopolis", + "inference_platform": "unknown", + "id": "tensopolis/falcon3-10b-tensopolis-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7792080568447275 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.618226655000786 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26661631419939574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3271812080536913 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4296875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4424035904255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/lamarckvergence-14b-tensopolis-v1/da94039c-b214-4ad0-a312-a38cea28498b.json b/data/hfopenllm_v2/tensopolis/lamarckvergence-14b-tensopolis-v1/da94039c-b214-4ad0-a312-a38cea28498b.json new file mode 100644 index 000000000..b215de49c --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/lamarckvergence-14b-tensopolis-v1/da94039c-b214-4ad0-a312-a38cea28498b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tensopolis_lamarckvergence-14b-tensopolis-v1/1762652580.555553", + "retrieved_timestamp": "1762652580.5555542", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tensopolis/lamarckvergence-14b-tensopolis-v1", + "developer": "tensopolis", + "inference_platform": "unknown", + "id": "tensopolis/lamarckvergence-14b-tensopolis-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7603735865281896 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6561154329558933 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5166163141993958 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36073825503355705 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44745833333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5250166223404256 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v1/574e1e63-46f3-44a4-8d04-ad1709a7e1dd.json b/data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v1/574e1e63-46f3-44a4-8d04-ad1709a7e1dd.json new file mode 100644 index 000000000..8d4d4e874 --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v1/574e1e63-46f3-44a4-8d04-ad1709a7e1dd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tensopolis_virtuoso-lite-tensopolis-v1/1762652580.557624", + "retrieved_timestamp": "1762652580.557625", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tensopolis/virtuoso-lite-tensopolis-v1", + "developer": "tensopolis", + "inference_platform": "unknown", + "id": "tensopolis/virtuoso-lite-tensopolis-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.806910109620252 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.610185430846048 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2545317220543807 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3447986577181208 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4582395833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4434840425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v2/9024dcc9-fbd0-4ab0-9142-cbf741e7ae54.json b/data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v2/9024dcc9-fbd0-4ab0-9142-cbf741e7ae54.json new file mode 100644 index 000000000..7e0e25500 --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/virtuoso-lite-tensopolis-v2/9024dcc9-fbd0-4ab0-9142-cbf741e7ae54.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tensopolis_virtuoso-lite-tensopolis-v2/1762652580.5578399", + "retrieved_timestamp": "1762652580.5578408", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tensopolis/virtuoso-lite-tensopolis-v2", + "developer": "tensopolis", + "inference_platform": "unknown", + "id": "tensopolis/virtuoso-lite-tensopolis-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8029384255996312 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6100187641793813 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34312080536912754 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4595416666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44398271276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v1/2228ade6-6243-423f-857e-66f5584a1511.json b/data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v1/2228ade6-6243-423f-857e-66f5584a1511.json new file mode 100644 index 000000000..27e63593c --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v1/2228ade6-6243-423f-857e-66f5584a1511.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tensopolis_virtuoso-small-tensopolis-v1/1762652580.5582058", + "retrieved_timestamp": "1762652580.558207", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tensopolis/virtuoso-small-tensopolis-v1", + "developer": "tensopolis", + "inference_platform": "unknown", + "id": "tensopolis/virtuoso-small-tensopolis-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7856276900845313 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6415395136436205 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3527190332326284 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32802013422818793 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43263541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4968417553191489 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v2/c5c34d42-c043-4d60-80bf-5cb522e9d915.json b/data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v2/c5c34d42-c043-4d60-80bf-5cb522e9d915.json new file mode 100644 index 000000000..0e73ac35f --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/virtuoso-small-tensopolis-v2/c5c34d42-c043-4d60-80bf-5cb522e9d915.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tensopolis_virtuoso-small-tensopolis-v2/1762652580.5584881", + "retrieved_timestamp": "1762652580.558489", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tensopolis/virtuoso-small-tensopolis-v2", + "developer": "tensopolis", + "inference_platform": "unknown", + "id": "tensopolis/virtuoso-small-tensopolis-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8020142111818863 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6515835977499008 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38746223564954685 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288590604026846 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43523958333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.515375664893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensopolis/virtuoso-small-v2-tensopolis-v1/727869c4-3498-482a-a04e-c6a779c0e558.json b/data/hfopenllm_v2/tensopolis/virtuoso-small-v2-tensopolis-v1/727869c4-3498-482a-a04e-c6a779c0e558.json new file mode 100644 index 000000000..04eb8e0c8 --- /dev/null +++ b/data/hfopenllm_v2/tensopolis/virtuoso-small-v2-tensopolis-v1/727869c4-3498-482a-a04e-c6a779c0e558.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tensopolis_virtuoso-small-v2-tensopolis-v1/1762652580.558718", + "retrieved_timestamp": "1762652580.558719", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tensopolis/virtuoso-small-v2-tensopolis-v1", + "developer": "tensopolis", + "inference_platform": "unknown", + "id": "tensopolis/virtuoso-small-v2-tensopolis-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8419061423689145 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6544753426578069 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.452416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3464765100671141 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45092708333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5175365691489362 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tensoropera/Fox-1-1.6B/998d2bbc-2722-4fb8-9a6a-230c146e2e37.json b/data/hfopenllm_v2/tensoropera/Fox-1-1.6B/998d2bbc-2722-4fb8-9a6a-230c146e2e37.json new file mode 100644 index 000000000..833fd69ab --- /dev/null +++ b/data/hfopenllm_v2/tensoropera/Fox-1-1.6B/998d2bbc-2722-4fb8-9a6a-230c146e2e37.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tensoropera_Fox-1-1.6B/1762652580.558935", + "retrieved_timestamp": "1762652580.558936", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tensoropera/Fox-1-1.6B", + "developer": "tensoropera", + "inference_platform": "unknown", + "id": "tensoropera/Fox-1-1.6B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27659831469390106 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3307369914593792 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.017371601208459216 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35498958333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1371343085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.665 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theo77186/Qwen2.5-Coder-7B-Instruct-20241106/b8198c8b-533a-4f7c-9025-1ccd7a4aba76.json b/data/hfopenllm_v2/theo77186/Qwen2.5-Coder-7B-Instruct-20241106/b8198c8b-533a-4f7c-9025-1ccd7a4aba76.json new file mode 100644 index 000000000..3d0d36519 --- /dev/null +++ b/data/hfopenllm_v2/theo77186/Qwen2.5-Coder-7B-Instruct-20241106/b8198c8b-533a-4f7c-9025-1ccd7a4aba76.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/theo77186_Qwen2.5-Coder-7B-Instruct-20241106/1762652580.559671", + "retrieved_timestamp": "1762652580.559671", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "theo77186/Qwen2.5-Coder-7B-Instruct-20241106", + "developer": "theo77186", + "inference_platform": "unknown", + "id": "theo77186/Qwen2.5-Coder-7B-Instruct-20241106" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6101477413263474 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5007976986224548 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38821752265861026 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29194630872483224 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4072708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33527260638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/Boptruth-Agatha-7B/0d1c0e64-8a5a-4797-9234-91a4f1726171.json b/data/hfopenllm_v2/theprint/Boptruth-Agatha-7B/0d1c0e64-8a5a-4797-9234-91a4f1726171.json new file mode 100644 index 000000000..9811acb30 --- /dev/null +++ b/data/hfopenllm_v2/theprint/Boptruth-Agatha-7B/0d1c0e64-8a5a-4797-9234-91a4f1726171.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/theprint_Boptruth-Agatha-7B/1762652580.559956", + "retrieved_timestamp": "1762652580.559957", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "theprint/Boptruth-Agatha-7B", + "developer": "theprint", + "inference_platform": "unknown", + "id": "theprint/Boptruth-Agatha-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.312418826491487 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4983936045348778 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42766666666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28607047872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/CleverBoi-7B-v2/0ef8de5e-4e2f-4d74-9267-e953375dbdf4.json b/data/hfopenllm_v2/theprint/CleverBoi-7B-v2/0ef8de5e-4e2f-4d74-9267-e953375dbdf4.json new file mode 100644 index 000000000..9d5f07784 --- /dev/null +++ b/data/hfopenllm_v2/theprint/CleverBoi-7B-v2/0ef8de5e-4e2f-4d74-9267-e953375dbdf4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/theprint_CleverBoi-7B-v2/1762652580.56022", + "retrieved_timestamp": "1762652580.560221", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "theprint/CleverBoi-7B-v2", + "developer": "theprint", + "inference_platform": "unknown", + "id": "theprint/CleverBoi-7B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21699756645700075 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45317253321634526 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46953125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27086103723404253 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 7.736 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/CleverBoi-7B-v3/4634b7d7-110e-422c-af60-80cd9df06dac.json b/data/hfopenllm_v2/theprint/CleverBoi-7B-v3/4634b7d7-110e-422c-af60-80cd9df06dac.json new file mode 100644 index 000000000..e3e4f6359 --- /dev/null +++ b/data/hfopenllm_v2/theprint/CleverBoi-7B-v3/4634b7d7-110e-422c-af60-80cd9df06dac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/theprint_CleverBoi-7B-v3/1762652580.560437", + "retrieved_timestamp": "1762652580.560438", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "theprint/CleverBoi-7B-v3", + "developer": "theprint", + "inference_platform": "unknown", + "id": "theprint/CleverBoi-7B-v3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23823011830831084 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4414430902840938 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26593959731543626 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4071770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28681848404255317 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 7.736 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/CleverBoi-Llama-3.1-8B-Instruct/86d3bb20-09a5-4ec0-a473-14a3e3c5a402.json b/data/hfopenllm_v2/theprint/CleverBoi-Llama-3.1-8B-Instruct/86d3bb20-09a5-4ec0-a473-14a3e3c5a402.json new file mode 100644 index 000000000..371434d40 --- /dev/null +++ b/data/hfopenllm_v2/theprint/CleverBoi-Llama-3.1-8B-Instruct/86d3bb20-09a5-4ec0-a473-14a3e3c5a402.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/theprint_CleverBoi-Llama-3.1-8B-Instruct/1762652580.5606558", + "retrieved_timestamp": "1762652580.5606568", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "theprint/CleverBoi-Llama-3.1-8B-Instruct", + "developer": "theprint", + "inference_platform": "unknown", + "id": "theprint/CleverBoi-Llama-3.1-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16816269719898758 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4559618469185147 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04909365558912387 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40143750000000006 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30751329787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 16.061 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/CleverBoi-Nemo-12B-v2/3ac95acf-830a-48ca-a144-42b610558062.json b/data/hfopenllm_v2/theprint/CleverBoi-Nemo-12B-v2/3ac95acf-830a-48ca-a144-42b610558062.json new file mode 100644 index 000000000..f81479b7f --- /dev/null +++ b/data/hfopenllm_v2/theprint/CleverBoi-Nemo-12B-v2/3ac95acf-830a-48ca-a144-42b610558062.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/theprint_CleverBoi-Nemo-12B-v2/1762652580.561142", + "retrieved_timestamp": "1762652580.561143", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "theprint/CleverBoi-Nemo-12B-v2", + "developer": "theprint", + "inference_platform": "unknown", + "id": "theprint/CleverBoi-Nemo-12B-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2045827293802666 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5241085887165254 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10347432024169184 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4186770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3228058510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 13.933 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/ReWiz-7B/b6f50cef-72b3-414c-a33a-a2c8b2af18c0.json b/data/hfopenllm_v2/theprint/ReWiz-7B/b6f50cef-72b3-414c-a33a-a2c8b2af18c0.json new file mode 100644 index 000000000..f37bcfb8a --- /dev/null +++ b/data/hfopenllm_v2/theprint/ReWiz-7B/b6f50cef-72b3-414c-a33a-a2c8b2af18c0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/theprint_ReWiz-7B/1762652580.562494", + "retrieved_timestamp": "1762652580.562496", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "theprint/ReWiz-7B", + "developer": "theprint", + "inference_platform": "unknown", + "id": "theprint/ReWiz-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40479261692309737 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4564215411912313 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04078549848942598 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46115625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2670378989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 7.736 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/ReWiz-Nemo-12B-Instruct/92999dc0-7075-44ee-be68-1ec32ab5645d.json b/data/hfopenllm_v2/theprint/ReWiz-Nemo-12B-Instruct/92999dc0-7075-44ee-be68-1ec32ab5645d.json new file mode 100644 index 000000000..eb5868635 --- /dev/null +++ b/data/hfopenllm_v2/theprint/ReWiz-Nemo-12B-Instruct/92999dc0-7075-44ee-be68-1ec32ab5645d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Nemo-12B-Instruct/1762652580.563264", + "retrieved_timestamp": "1762652580.563264", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "theprint/ReWiz-Nemo-12B-Instruct", + "developer": "theprint", + "inference_platform": "unknown", + "id": "theprint/ReWiz-Nemo-12B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10623811486854878 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5092407647626753 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3238255033557047 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4095625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33394281914893614 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 12.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/ReWiz-Worldbuilder-7B/cf71c265-ef73-4410-a2bc-ce9702cfbcee.json b/data/hfopenllm_v2/theprint/ReWiz-Worldbuilder-7B/cf71c265-ef73-4410-a2bc-ce9702cfbcee.json new file mode 100644 index 000000000..7b88710d6 --- /dev/null +++ b/data/hfopenllm_v2/theprint/ReWiz-Worldbuilder-7B/cf71c265-ef73-4410-a2bc-ce9702cfbcee.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Worldbuilder-7B/1762652580.563769", + "retrieved_timestamp": "1762652580.56377", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "theprint/ReWiz-Worldbuilder-7B", + "developer": "theprint", + "inference_platform": "unknown", + "id": "theprint/ReWiz-Worldbuilder-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25101951710350756 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46361558385510165 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03700906344410876 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45725 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.297124335106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.248 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/RuDolph-Hermes-7B/22bab713-09d7-471a-b077-cb8c336ba151.json b/data/hfopenllm_v2/theprint/RuDolph-Hermes-7B/22bab713-09d7-471a-b077-cb8c336ba151.json new file mode 100644 index 000000000..f170f1f91 --- /dev/null +++ b/data/hfopenllm_v2/theprint/RuDolph-Hermes-7B/22bab713-09d7-471a-b077-cb8c336ba151.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/theprint_RuDolph-Hermes-7B/1762652580.564037", + "retrieved_timestamp": "1762652580.5640378", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "theprint/RuDolph-Hermes-7B", + "developer": "theprint", + "inference_platform": "unknown", + "id": "theprint/RuDolph-Hermes-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3604292167005767 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5052928613425586 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0513595166163142 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31208053691275167 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4226145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30726396276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/theprint/WorldBuilder-12B/f1107803-5a3b-4fcc-b948-ff622b5f26da.json b/data/hfopenllm_v2/theprint/WorldBuilder-12B/f1107803-5a3b-4fcc-b948-ff622b5f26da.json new file mode 100644 index 000000000..0272256f2 --- /dev/null +++ b/data/hfopenllm_v2/theprint/WorldBuilder-12B/f1107803-5a3b-4fcc-b948-ff622b5f26da.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/theprint_WorldBuilder-12B/1762652580.564255", + "retrieved_timestamp": "1762652580.564256", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "theprint/WorldBuilder-12B", + "developer": "theprint", + "inference_platform": "unknown", + "id": "theprint/WorldBuilder-12B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13743755457741016 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5010100641541125 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0445619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4066458333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31923204787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "?", + "params_billions": 13.933 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/thinkcoder/llama3-8b-instruct-lora-8-sft/51caac64-fee1-4c7f-b474-1b1e0f71212c.json b/data/hfopenllm_v2/thinkcoder/llama3-8b-instruct-lora-8-sft/51caac64-fee1-4c7f-b474-1b1e0f71212c.json new file mode 100644 index 000000000..5a70acb41 --- /dev/null +++ b/data/hfopenllm_v2/thinkcoder/llama3-8b-instruct-lora-8-sft/51caac64-fee1-4c7f-b474-1b1e0f71212c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/thinkcoder_llama3-8b-instruct-lora-8-sft/1762652580.564969", + "retrieved_timestamp": "1762652580.56497", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "thinkcoder/llama3-8b-instruct-lora-8-sft", + "developer": "thinkcoder", + "inference_platform": "unknown", + "id": "thinkcoder/llama3-8b-instruct-lora-8-sft" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6480416406246536 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4865011845587858 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10196374622356495 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32345833333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34757313829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/thirdeyeai/elevate360m/013a9bf9-7b9e-4084-b7a2-bb77ad0c18e1.json b/data/hfopenllm_v2/thirdeyeai/elevate360m/013a9bf9-7b9e-4084-b7a2-bb77ad0c18e1.json new file mode 100644 index 000000000..02c234e98 --- /dev/null +++ b/data/hfopenllm_v2/thirdeyeai/elevate360m/013a9bf9-7b9e-4084-b7a2-bb77ad0c18e1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/thirdeyeai_elevate360m/1762652580.565248", + "retrieved_timestamp": "1762652580.565249", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "thirdeyeai/elevate360m", + "developer": "thirdeyeai", + "inference_platform": "unknown", + "id": "thirdeyeai/elevate360m" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04448862351892978 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2962583602962783 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2407718120805369 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34621875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1077127659574468 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.362 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-10B-Base/4e1ce0d3-f454-480b-a4f7-7aa827eaaf1a.json b/data/hfopenllm_v2/tiiuae/Falcon3-10B-Base/4e1ce0d3-f454-480b-a4f7-7aa827eaaf1a.json new file mode 100644 index 000000000..dfc311a42 --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/Falcon3-10B-Base/4e1ce0d3-f454-480b-a4f7-7aa827eaaf1a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-10B-Base/1762652580.566659", + "retrieved_timestamp": "1762652580.566659", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tiiuae/Falcon3-10B-Base", + "developer": "tiiuae", + "inference_platform": "unknown", + "id": "tiiuae/Falcon3-10B-Base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3647754624396601 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.595004253437141 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24924471299093656 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34563758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43979166666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4240359042553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-10B-Instruct/741838df-e2a3-4c54-84d3-fe491444071b.json b/data/hfopenllm_v2/tiiuae/Falcon3-10B-Instruct/741838df-e2a3-4c54-84d3-fe491444071b.json new file mode 100644 index 000000000..047aa5bfc --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/Falcon3-10B-Instruct/741838df-e2a3-4c54-84d3-fe491444071b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-10B-Instruct/1762652580.566902", + "retrieved_timestamp": "1762652580.566903", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tiiuae/Falcon3-10B-Instruct", + "developer": "tiiuae", + "inference_platform": "unknown", + "id": "tiiuae/Falcon3-10B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7816560060639104 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6170469398052084 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2764350453172205 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3288590604026846 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43232291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44290226063829785 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.306 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-1B-Base/1e11a625-87e1-49d0-94a6-8f9ec1f75fc3.json b/data/hfopenllm_v2/tiiuae/Falcon3-1B-Base/1e11a625-87e1-49d0-94a6-8f9ec1f75fc3.json new file mode 100644 index 000000000..f801d95be --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/Falcon3-1B-Base/1e11a625-87e1-49d0-94a6-8f9ec1f75fc3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-1B-Base/1762652580.567122", + "retrieved_timestamp": "1762652580.567122", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tiiuae/Falcon3-1B-Base", + "developer": "tiiuae", + "inference_platform": "unknown", + "id": "tiiuae/Falcon3-1B-Base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24280132271262472 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3571153918015637 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03323262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41473958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16082114361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.669 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-1B-Instruct/a060e2b0-d1ae-48b7-b8f9-c51fadc3e152.json b/data/hfopenllm_v2/tiiuae/Falcon3-1B-Instruct/a060e2b0-d1ae-48b7-b8f9-c51fadc3e152.json new file mode 100644 index 000000000..62f223388 --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/Falcon3-1B-Instruct/a060e2b0-d1ae-48b7-b8f9-c51fadc3e152.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-1B-Instruct/1762652580.567335", + "retrieved_timestamp": "1762652580.567335", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tiiuae/Falcon3-1B-Instruct", + "developer": "tiiuae", + "inference_platform": "unknown", + "id": "tiiuae/Falcon3-1B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5556678501930433 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3744535691366672 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0634441087613293 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4188958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18384308510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.669 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-3B-Base/1b0d1ae7-322b-46d2-bc33-160f578499b1.json b/data/hfopenllm_v2/tiiuae/Falcon3-3B-Base/1b0d1ae7-322b-46d2-bc33-160f578499b1.json new file mode 100644 index 000000000..355f6c2b2 --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/Falcon3-3B-Base/1b0d1ae7-322b-46d2-bc33-160f578499b1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-3B-Base/1762652580.5675461", + "retrieved_timestamp": "1762652580.5675468", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tiiuae/Falcon3-3B-Base", + "developer": "tiiuae", + "inference_platform": "unknown", + "id": "tiiuae/Falcon3-3B-Base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2764985793250797 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4421367825874385 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11782477341389729 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29697986577181207 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3749895833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2878989361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.228 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-3B-Instruct/7aa3aa0e-3b5e-4c0c-a697-2e87859c44f2.json b/data/hfopenllm_v2/tiiuae/Falcon3-3B-Instruct/7aa3aa0e-3b5e-4c0c-a697-2e87859c44f2.json new file mode 100644 index 000000000..1dc16793d --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/Falcon3-3B-Instruct/7aa3aa0e-3b5e-4c0c-a697-2e87859c44f2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-3B-Instruct/1762652580.567748", + "retrieved_timestamp": "1762652580.567749", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tiiuae/Falcon3-3B-Instruct", + "developer": "tiiuae", + "inference_platform": "unknown", + "id": "tiiuae/Falcon3-3B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6976755010040027 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4754430332167569 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28859060402684567 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41359375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.300531914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.228 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-7B-Base/2420519c-81f1-43b3-9b76-af141d2574f4.json b/data/hfopenllm_v2/tiiuae/Falcon3-7B-Base/2420519c-81f1-43b3-9b76-af141d2574f4.json new file mode 100644 index 000000000..fb8eeded3 --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/Falcon3-7B-Base/2420519c-81f1-43b3-9b76-af141d2574f4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-7B-Base/1762652580.56796", + "retrieved_timestamp": "1762652580.567961", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tiiuae/Falcon3-7B-Base", + "developer": "tiiuae", + "inference_platform": "unknown", + "id": "tiiuae/Falcon3-7B-Base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34159474638403875 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5098880466426711 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19410876132930513 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3464765100671141 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47020833333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3910405585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-7B-Instruct/ed988bd0-76b0-4ab6-9c9e-5a5e0aefb936.json b/data/hfopenllm_v2/tiiuae/Falcon3-7B-Instruct/ed988bd0-76b0-4ab6-9c9e-5a5e0aefb936.json new file mode 100644 index 000000000..dc4044c6e --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/Falcon3-7B-Instruct/ed988bd0-76b0-4ab6-9c9e-5a5e0aefb936.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-7B-Instruct/1762652580.568164", + "retrieved_timestamp": "1762652580.568164", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tiiuae/Falcon3-7B-Instruct", + "developer": "tiiuae", + "inference_platform": "unknown", + "id": "tiiuae/Falcon3-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7612479332615238 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.563244278519333 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4086102719033233 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48267708333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4087433510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.456 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Base/766e6e63-5779-49cd-9e8c-2bc475c1356a.json b/data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Base/766e6e63-5779-49cd-9e8c-2bc475c1356a.json new file mode 100644 index 000000000..51eb376fe --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Base/766e6e63-5779-49cd-9e8c-2bc475c1356a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-Mamba-7B-Base/1762652580.568367", + "retrieved_timestamp": "1762652580.5683682", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tiiuae/Falcon3-Mamba-7B-Base", + "developer": "tiiuae", + "inference_platform": "unknown", + "id": "tiiuae/Falcon3-Mamba-7B-Base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28911288713945665 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4699280188827039 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19410876132930513 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3431458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30377327127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "FalconMambaForCausalLM", + "params_billions": 7.273 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Instruct/69491efc-0287-4288-bdf0-bcc57c53b94e.json b/data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Instruct/69491efc-0287-4288-bdf0-bcc57c53b94e.json new file mode 100644 index 000000000..089c06dad --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/Falcon3-Mamba-7B-Instruct/69491efc-0287-4288-bdf0-bcc57c53b94e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-Mamba-7B-Instruct/1762652580.5685718", + "retrieved_timestamp": "1762652580.5685718", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tiiuae/Falcon3-Mamba-7B-Instruct", + "developer": "tiiuae", + "inference_platform": "unknown", + "id": "tiiuae/Falcon3-Mamba-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7165099713205406 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4678957688410694 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30060422960725075 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38686458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3369348404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "FalconMambaForCausalLM", + "params_billions": 7.273 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/falcon-11B/705a1ff4-2e40-4827-af54-099870fac588.json b/data/hfopenllm_v2/tiiuae/falcon-11B/705a1ff4-2e40-4827-af54-099870fac588.json new file mode 100644 index 000000000..63bee7ed2 --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/falcon-11B/705a1ff4-2e40-4827-af54-099870fac588.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tiiuae_falcon-11B/1762652580.568774", + "retrieved_timestamp": "1762652580.568774", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tiiuae/falcon-11B", + "developer": "tiiuae", + "inference_platform": "unknown", + "id": "tiiuae/falcon-11B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3261324397044287 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43916370355493844 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.027945619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2709731543624161 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39864583333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23894614361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "FalconForCausalLM", + "params_billions": 11.103 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/falcon-40b-instruct/1d6f8802-e9aa-471c-8fbc-1cd807357ab5.json b/data/hfopenllm_v2/tiiuae/falcon-40b-instruct/1d6f8802-e9aa-471c-8fbc-1cd807357ab5.json new file mode 100644 index 000000000..bd4eed595 --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/falcon-40b-instruct/1d6f8802-e9aa-471c-8fbc-1cd807357ab5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tiiuae_falcon-40b-instruct/1762652580.569173", + "retrieved_timestamp": "1762652580.569173", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tiiuae/falcon-40b-instruct", + "developer": "tiiuae", + "inference_platform": "unknown", + "id": "tiiuae/falcon-40b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24544874266945038 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40538675151591974 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.019637462235649546 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37622916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2261469414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "FalconForCausalLM", + "params_billions": 40.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/falcon-40b/cfdece82-631e-48b7-8232-91a8d9ccf65c.json b/data/hfopenllm_v2/tiiuae/falcon-40b/cfdece82-631e-48b7-8232-91a8d9ccf65c.json new file mode 100644 index 000000000..27223a9df --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/falcon-40b/cfdece82-631e-48b7-8232-91a8d9ccf65c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tiiuae_falcon-40b/1762652580.568969", + "retrieved_timestamp": "1762652580.56897", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tiiuae/falcon-40b", + "developer": "tiiuae", + "inference_platform": "unknown", + "id": "tiiuae/falcon-40b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24964538535530173 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4018532495595801 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01812688821752266 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27348993288590606 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36314583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25049867021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "FalconForCausalLM", + "params_billions": 40.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/falcon-7b-instruct/2b84722f-58fc-421d-ae1a-9e21ac0b4080.json b/data/hfopenllm_v2/tiiuae/falcon-7b-instruct/2b84722f-58fc-421d-ae1a-9e21ac0b4080.json new file mode 100644 index 000000000..d618f253e --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/falcon-7b-instruct/2b84722f-58fc-421d-ae1a-9e21ac0b4080.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tiiuae_falcon-7b-instruct/1762652580.5696268", + "retrieved_timestamp": "1762652580.5696268", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tiiuae/falcon-7b-instruct", + "developer": "tiiuae", + "inference_platform": "unknown", + "id": "tiiuae/falcon-7b-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19688869976107837 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32034221512355765 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.012084592145015106 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24748322147651006 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3633645833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1155252659574468 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "FalconForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/falcon-7b/0e9837cb-4dda-4058-a89e-4127b5980eed.json b/data/hfopenllm_v2/tiiuae/falcon-7b/0e9837cb-4dda-4058-a89e-4127b5980eed.json new file mode 100644 index 000000000..e53ba5930 --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/falcon-7b/0e9837cb-4dda-4058-a89e-4127b5980eed.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tiiuae_falcon-7b/1762652580.5693781", + "retrieved_timestamp": "1762652580.569379", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tiiuae/falcon-7b", + "developer": "tiiuae", + "inference_platform": "unknown", + "id": "tiiuae/falcon-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.182051401392749 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32852446117322215 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.009818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24496644295302014 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37784375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11253324468085106 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "FalconForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tiiuae/falcon-mamba-7b/9878c419-fff8-402a-a315-70864e5ae60c.json b/data/hfopenllm_v2/tiiuae/falcon-mamba-7b/9878c419-fff8-402a-a315-70864e5ae60c.json new file mode 100644 index 000000000..6ebba9a1c --- /dev/null +++ b/data/hfopenllm_v2/tiiuae/falcon-mamba-7b/9878c419-fff8-402a-a315-70864e5ae60c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tiiuae_falcon-mamba-7b/1762652580.569833", + "retrieved_timestamp": "1762652580.569834", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tiiuae/falcon-mamba-7b", + "developer": "tiiuae", + "inference_platform": "unknown", + "id": "tiiuae/falcon-mamba-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3335760227307987 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4284854988604366 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0445619335347432 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3104026845637584 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42103124999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23021941489361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "FalconMambaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/BiBo-v0.3/d0907791-99ed-4c01-8df4-80ab6ecc906f.json b/data/hfopenllm_v2/tinycompany/BiBo-v0.3/d0907791-99ed-4c01-8df4-80ab6ecc906f.json new file mode 100644 index 000000000..f04633a4c --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/BiBo-v0.3/d0907791-99ed-4c01-8df4-80ab6ecc906f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tinycompany_BiBo-v0.3/1762652580.570036", + "retrieved_timestamp": "1762652580.570036", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tinycompany/BiBo-v0.3", + "developer": "tinycompany", + "inference_platform": "unknown", + "id": "tinycompany/BiBo-v0.3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5183989592060179 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4641611514377814 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08761329305135952 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3949895833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29945146276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.943 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/BiBo-v0.7/8f186e60-a090-4b9e-9910-23054617fe57.json b/data/hfopenllm_v2/tinycompany/BiBo-v0.7/8f186e60-a090-4b9e-9910-23054617fe57.json new file mode 100644 index 000000000..384946d8a --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/BiBo-v0.7/8f186e60-a090-4b9e-9910-23054617fe57.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tinycompany_BiBo-v0.7/1762652580.570291", + "retrieved_timestamp": "1762652580.570291", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tinycompany/BiBo-v0.7", + "developer": "tinycompany", + "inference_platform": "unknown", + "id": "tinycompany/BiBo-v0.7" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3738181358794665 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43108167584271034 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0823262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40441666666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2650432180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.943 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/ShawtyIsBad-bgem3/ebf9067a-9836-4152-aa62-3ecbbc2459dc.json b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-bgem3/ebf9067a-9836-4152-aa62-3ecbbc2459dc.json new file mode 100644 index 000000000..eda1262c9 --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-bgem3/ebf9067a-9836-4152-aa62-3ecbbc2459dc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tinycompany_ShawtyIsBad-bgem3/1762652580.570496", + "retrieved_timestamp": "1762652580.570497", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tinycompany/ShawtyIsBad-bgem3", + "developer": "tinycompany", + "inference_platform": "unknown", + "id": "tinycompany/ShawtyIsBad-bgem3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2608113139802391 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38529707856388956 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04833836858006042 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36946875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25831117021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.436 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/ShawtyIsBad-e5-large/e8fe4b10-f6f3-4036-a3d9-77b8d28822ae.json b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-e5-large/e8fe4b10-f6f3-4036-a3d9-77b8d28822ae.json new file mode 100644 index 000000000..d5583fd7b --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-e5-large/e8fe4b10-f6f3-4036-a3d9-77b8d28822ae.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tinycompany_ShawtyIsBad-e5-large/1762652580.5709078", + "retrieved_timestamp": "1762652580.570912", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tinycompany/ShawtyIsBad-e5-large", + "developer": "tinycompany", + "inference_platform": "unknown", + "id": "tinycompany/ShawtyIsBad-e5-large" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24682287441765627 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3873483842947396 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.045317220543806644 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37204166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25689827127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.436 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/ShawtyIsBad-ib/e2514850-3847-4fe7-abd8-240762ba507a.json b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-ib/e2514850-3847-4fe7-abd8-240762ba507a.json new file mode 100644 index 000000000..301ea3925 --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-ib/e2514850-3847-4fe7-abd8-240762ba507a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tinycompany_ShawtyIsBad-ib/1762652580.571291", + "retrieved_timestamp": "1762652580.571292", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tinycompany/ShawtyIsBad-ib", + "developer": "tinycompany", + "inference_platform": "unknown", + "id": "tinycompany/ShawtyIsBad-ib" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2565149359255664 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3880457874839807 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04909365558912387 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3641041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.258061835106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.436 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic-moe/7896d77a-e4c3-431b-9490-26d88664385b.json b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic-moe/7896d77a-e4c3-431b-9490-26d88664385b.json new file mode 100644 index 000000000..01b2ec023 --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic-moe/7896d77a-e4c3-431b-9490-26d88664385b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tinycompany_ShawtyIsBad-nomic-moe/1762652580.571543", + "retrieved_timestamp": "1762652580.5715442", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tinycompany/ShawtyIsBad-nomic-moe", + "developer": "tinycompany", + "inference_platform": "unknown", + "id": "tinycompany/ShawtyIsBad-nomic-moe" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2607614462958284 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3878019225656597 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3070469798657718 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37470833333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2572307180851064 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.436 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic1.5/cbda0920-b298-4db2-806d-65b7d6550b30.json b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic1.5/cbda0920-b298-4db2-806d-65b7d6550b30.json new file mode 100644 index 000000000..78de145c9 --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/ShawtyIsBad-nomic1.5/cbda0920-b298-4db2-806d-65b7d6550b30.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tinycompany_ShawtyIsBad-nomic1.5/1762652580.571785", + "retrieved_timestamp": "1762652580.571787", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tinycompany/ShawtyIsBad-nomic1.5", + "developer": "tinycompany", + "inference_platform": "unknown", + "id": "tinycompany/ShawtyIsBad-nomic1.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2543916807404354 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3873599493472512 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.311241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36283333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25673204787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.436 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-base/e523d43e-a198-4db5-9d91-c4959b136953.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-base/e523d43e-a198-4db5-9d91-c4959b136953.json new file mode 100644 index 000000000..831b4ab9b --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/SigmaBoi-base/e523d43e-a198-4db5-9d91-c4959b136953.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-base/1762652580.5720189", + "retrieved_timestamp": "1762652580.57202", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tinycompany/SigmaBoi-base", + "developer": "tinycompany", + "inference_platform": "unknown", + "id": "tinycompany/SigmaBoi-base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24469961923252526 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4314363391906919 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07779456193353475 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43427083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2816655585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.943 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-bge-m3/383b2f80-774b-4f76-998a-9d3d20a265db.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-bge-m3/383b2f80-774b-4f76-998a-9d3d20a265db.json new file mode 100644 index 000000000..8db2748bf --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/SigmaBoi-bge-m3/383b2f80-774b-4f76-998a-9d3d20a265db.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-bge-m3/1762652580.572246", + "retrieved_timestamp": "1762652580.572247", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tinycompany/SigmaBoi-bge-m3", + "developer": "tinycompany", + "inference_platform": "unknown", + "id": "tinycompany/SigmaBoi-bge-m3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24502431326657714 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43509173985964184 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07628398791540786 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4383020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28191489361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.943 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-bgem3/2b84e1be-81f6-474e-be5b-c5f4e60167fe.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-bgem3/2b84e1be-81f6-474e-be5b-c5f4e60167fe.json new file mode 100644 index 000000000..5ad753777 --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/SigmaBoi-bgem3/2b84e1be-81f6-474e-be5b-c5f4e60167fe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-bgem3/1762652580.572469", + "retrieved_timestamp": "1762652580.57247", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tinycompany/SigmaBoi-bgem3", + "developer": "tinycompany", + "inference_platform": "unknown", + "id": "tinycompany/SigmaBoi-bgem3" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24502431326657714 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43509173985964184 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07628398791540786 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4383020833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28191489361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.943 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-ib/55c0df8c-8dba-4508-8fe3-6ee726fa8a44.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-ib/55c0df8c-8dba-4508-8fe3-6ee726fa8a44.json new file mode 100644 index 000000000..7bd25328e --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/SigmaBoi-ib/55c0df8c-8dba-4508-8fe3-6ee726fa8a44.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-ib/1762652580.572692", + "retrieved_timestamp": "1762652580.572693", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tinycompany/SigmaBoi-ib", + "developer": "tinycompany", + "inference_platform": "unknown", + "id": "tinycompany/SigmaBoi-ib" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24774708883540117 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4343622024096135 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07401812688821752 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42896874999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2824135638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.943 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic-moe/2dff318a-f64f-407b-acd3-2b1020d3f5cd.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic-moe/2dff318a-f64f-407b-acd3-2b1020d3f5cd.json new file mode 100644 index 000000000..f9d43f336 --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic-moe/2dff318a-f64f-407b-acd3-2b1020d3f5cd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-nomic-moe/1762652580.57291", + "retrieved_timestamp": "1762652580.572911", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tinycompany/SigmaBoi-nomic-moe", + "developer": "tinycompany", + "inference_platform": "unknown", + "id": "tinycompany/SigmaBoi-nomic-moe" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2474223948013493 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43341835214223373 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07175226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29278523489932884 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43163541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28366023936170215 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.943 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5-fp32/39b85f29-d449-40d6-bb0e-cb4790a47cc7.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5-fp32/39b85f29-d449-40d6-bb0e-cb4790a47cc7.json new file mode 100644 index 000000000..b4427cd31 --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5-fp32/39b85f29-d449-40d6-bb0e-cb4790a47cc7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-nomic1.5-fp32/1762652580.573416", + "retrieved_timestamp": "1762652580.573416", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tinycompany/SigmaBoi-nomic1.5-fp32", + "developer": "tinycompany", + "inference_platform": "unknown", + "id": "tinycompany/SigmaBoi-nomic1.5-fp32" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24622335403396323 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43705348265770266 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4316041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28407579787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.943 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5/9ff57503-4fc4-4d21-8899-d691c912bff9.json b/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5/9ff57503-4fc4-4d21-8899-d691c912bff9.json new file mode 100644 index 000000000..558f89303 --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/SigmaBoi-nomic1.5/9ff57503-4fc4-4d21-8899-d691c912bff9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-nomic1.5/1762652580.5731819", + "retrieved_timestamp": "1762652580.5731819", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tinycompany/SigmaBoi-nomic1.5", + "developer": "tinycompany", + "inference_platform": "unknown", + "id": "tinycompany/SigmaBoi-nomic1.5" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24469961923252526 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43705348265770266 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4316041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28407579787234044 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.943 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tinycompany/Tamed-Shawty/6d2370ea-55ab-4ae7-a11a-c1556e988349.json b/data/hfopenllm_v2/tinycompany/Tamed-Shawty/6d2370ea-55ab-4ae7-a11a-c1556e988349.json new file mode 100644 index 000000000..f0ee696d9 --- /dev/null +++ b/data/hfopenllm_v2/tinycompany/Tamed-Shawty/6d2370ea-55ab-4ae7-a11a-c1556e988349.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tinycompany_Tamed-Shawty/1762652580.573629", + "retrieved_timestamp": "1762652580.573629", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tinycompany/Tamed-Shawty", + "developer": "tinycompany", + "inference_platform": "unknown", + "id": "tinycompany/Tamed-Shawty" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38308576798450333 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3837059588999942 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07175226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2625838926174497 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35009375000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2601396276595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.562 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tklohj/WindyFloLLM/53f0c477-6f06-427a-be34-5b0131cbf9e1.json b/data/hfopenllm_v2/tklohj/WindyFloLLM/53f0c477-6f06-427a-be34-5b0131cbf9e1.json new file mode 100644 index 000000000..e1a790ca3 --- /dev/null +++ b/data/hfopenllm_v2/tklohj/WindyFloLLM/53f0c477-6f06-427a-be34-5b0131cbf9e1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tklohj_WindyFloLLM/1762652580.573854", + "retrieved_timestamp": "1762652580.573855", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tklohj/WindyFloLLM", + "developer": "tklohj", + "inference_platform": "unknown", + "id": "tklohj/WindyFloLLM" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26685638550158025 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4636616007058791 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2751677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4253125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25814494680851063 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.016 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/togethercomputer/GPT-NeoXT-Chat-Base-20B/3b5ca740-a1e5-4043-ad56-c772bbdd1b38.json b/data/hfopenllm_v2/togethercomputer/GPT-NeoXT-Chat-Base-20B/3b5ca740-a1e5-4043-ad56-c772bbdd1b38.json new file mode 100644 index 000000000..8f472e831 --- /dev/null +++ b/data/hfopenllm_v2/togethercomputer/GPT-NeoXT-Chat-Base-20B/3b5ca740-a1e5-4043-ad56-c772bbdd1b38.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/togethercomputer_GPT-NeoXT-Chat-Base-20B/1762652580.574344", + "retrieved_timestamp": "1762652580.5743449", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "togethercomputer/GPT-NeoXT-Chat-Base-20B", + "developer": "togethercomputer", + "inference_platform": "unknown", + "id": "togethercomputer/GPT-NeoXT-Chat-Base-20B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.18297561581049393 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33209702572173033 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.023413897280966767 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3460625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11452792553191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 20.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/togethercomputer/Llama-2-7B-32K-Instruct/a1609dba-826b-4246-9230-35bd68268fe4.json b/data/hfopenllm_v2/togethercomputer/Llama-2-7B-32K-Instruct/a1609dba-826b-4246-9230-35bd68268fe4.json new file mode 100644 index 000000000..85f257de2 --- /dev/null +++ b/data/hfopenllm_v2/togethercomputer/Llama-2-7B-32K-Instruct/a1609dba-826b-4246-9230-35bd68268fe4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/togethercomputer_Llama-2-7B-32K-Instruct/1762652580.574983", + "retrieved_timestamp": "1762652580.5749838", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "togethercomputer/Llama-2-7B-32K-Instruct", + "developer": "togethercomputer", + "inference_platform": "unknown", + "id": "togethercomputer/Llama-2-7B-32K-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2130003945087922 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34434724239927544 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2516778523489933 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40559375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17810837765957446 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Base/8d69f711-74c9-4c1e-87dc-9b46f70674bb.json b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Base/8d69f711-74c9-4c1e-87dc-9b46f70674bb.json new file mode 100644 index 000000000..633c68ef4 --- /dev/null +++ b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Base/8d69f711-74c9-4c1e-87dc-9b46f70674bb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-7B-Base/1762652580.5751948", + "retrieved_timestamp": "1762652580.5751958", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "togethercomputer/RedPajama-INCITE-7B-Base", + "developer": "togethercomputer", + "inference_platform": "unknown", + "id": "togethercomputer/RedPajama-INCITE-7B-Base" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20822971936683554 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31948898765013445 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.015861027190332326 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2550335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36199999999999993 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1196808510638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Chat/c3b6efec-5428-499f-8e6b-e3b2b87a0d15.json b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Chat/c3b6efec-5428-499f-8e6b-e3b2b87a0d15.json new file mode 100644 index 000000000..2e0055686 --- /dev/null +++ b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Chat/c3b6efec-5428-499f-8e6b-e3b2b87a0d15.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-7B-Chat/1762652580.57541", + "retrieved_timestamp": "1762652580.5754108", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "togethercomputer/RedPajama-INCITE-7B-Chat", + "developer": "togethercomputer", + "inference_platform": "unknown", + "id": "togethercomputer/RedPajama-INCITE-7B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1557977278066641 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3175449328457368 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.006797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2525167785234899 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3447604166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11211768617021277 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Instruct/d8cef007-51ab-4793-9a74-d9f29d6c0f27.json b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Instruct/d8cef007-51ab-4793-9a74-d9f29d6c0f27.json new file mode 100644 index 000000000..720cd312b --- /dev/null +++ b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-7B-Instruct/d8cef007-51ab-4793-9a74-d9f29d6c0f27.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-7B-Instruct/1762652580.57568", + "retrieved_timestamp": "1762652580.575681", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "togethercomputer/RedPajama-INCITE-7B-Instruct", + "developer": "togethercomputer", + "inference_platform": "unknown", + "id": "togethercomputer/RedPajama-INCITE-7B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2055069437980115 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.337743947089799 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.021148036253776436 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25083892617449666 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3685104166666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1272440159574468 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 7.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Base-3B-v1/ba5c73b3-4785-44ef-8bfb-cfbbbdc16a91.json b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Base-3B-v1/ba5c73b3-4785-44ef-8bfb-cfbbbdc16a91.json new file mode 100644 index 000000000..826d547fa --- /dev/null +++ b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Base-3B-v1/ba5c73b3-4785-44ef-8bfb-cfbbbdc16a91.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-Base-3B-v1/1762652580.575899", + "retrieved_timestamp": "1762652580.5758998", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "togethercomputer/RedPajama-INCITE-Base-3B-v1", + "developer": "togethercomputer", + "inference_platform": "unknown", + "id": "togethercomputer/RedPajama-INCITE-Base-3B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22936253584932426 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3060403878987615 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.014350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24328859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37387499999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11112034574468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 3.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Chat-3B-v1/9a0e6d99-4f86-4ce8-9b5a-f7b6c0fbd710.json b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Chat-3B-v1/9a0e6d99-4f86-4ce8-9b5a-f7b6c0fbd710.json new file mode 100644 index 000000000..e238a27d5 --- /dev/null +++ b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Chat-3B-v1/9a0e6d99-4f86-4ce8-9b5a-f7b6c0fbd710.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-Chat-3B-v1/1762652580.5763452", + "retrieved_timestamp": "1762652580.5763478", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "togethercomputer/RedPajama-INCITE-Chat-3B-v1", + "developer": "togethercomputer", + "inference_platform": "unknown", + "id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16521496296493304 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32166937119202416 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24412751677852348 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3684479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11269946808510638 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 3.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Instruct-3B-v1/e78a3888-33c7-4264-a01e-b0661504322f.json b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Instruct-3B-v1/e78a3888-33c7-4264-a01e-b0661504322f.json new file mode 100644 index 000000000..15d8d477b --- /dev/null +++ b/data/hfopenllm_v2/togethercomputer/RedPajama-INCITE-Instruct-3B-v1/e78a3888-33c7-4264-a01e-b0661504322f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-Instruct-3B-v1/1762652580.576687", + "retrieved_timestamp": "1762652580.576688", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1", + "developer": "togethercomputer", + "inference_platform": "unknown", + "id": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2124263620526869 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3146017752057237 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24748322147651006 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38860416666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11095412234042554 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GPTNeoXForCausalLM", + "params_billions": 3.0 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1/f6729e0a-559f-4087-af75-37634bf0af62.json b/data/hfopenllm_v2/tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1/f6729e0a-559f-4087-af75-37634bf0af62.json new file mode 100644 index 000000000..83ba80162 --- /dev/null +++ b/data/hfopenllm_v2/tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1/f6729e0a-559f-4087-af75-37634bf0af62.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tokyotech-llm_Llama-3-Swallow-8B-Instruct-v0.1/1762652580.5769222", + "retrieved_timestamp": "1762652580.576923", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1", + "developer": "tokyotech-llm", + "inference_platform": "unknown", + "id": "tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5507719517546776 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5009389976232003 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07477341389728097 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43569791666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087599734042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tomasmcm/sky-t1-coder-32b-flash/1229310f-22aa-4ef9-b354-71fa249569f7.json b/data/hfopenllm_v2/tomasmcm/sky-t1-coder-32b-flash/1229310f-22aa-4ef9-b354-71fa249569f7.json new file mode 100644 index 000000000..049f72d3f --- /dev/null +++ b/data/hfopenllm_v2/tomasmcm/sky-t1-coder-32b-flash/1229310f-22aa-4ef9-b354-71fa249569f7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tomasmcm_sky-t1-coder-32b-flash/1762652580.577295", + "retrieved_timestamp": "1762652580.5772958", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tomasmcm/sky-t1-coder-32b-flash", + "developer": "tomasmcm", + "inference_platform": "unknown", + "id": "tomasmcm/sky-t1-coder-32b-flash" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7780090160773414 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6822440044314982 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5422960725075529 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36828859060402686 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4232708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5782081117021277 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1/1cfb7d70-b903-48ae-bdb2-31c838bdabc8.json b/data/hfopenllm_v2/tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1/1cfb7d70-b903-48ae-bdb2-31c838bdabc8.json new file mode 100644 index 000000000..0de6b10ea --- /dev/null +++ b/data/hfopenllm_v2/tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1/1cfb7d70-b903-48ae-bdb2-31c838bdabc8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/tugstugi_Qwen2.5-7B-Instruct-QwQ-v0.1/1762652580.577852", + "retrieved_timestamp": "1762652580.577852", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1", + "developer": "tugstugi", + "inference_platform": "unknown", + "id": "tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6017300761978217 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5101062293388118 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3814199395770393 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2684563758389262 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3794270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4080784574468085 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct-no-system-message/d8d52ed0-2eb6-4be3-9e4e-346a6b19ceca.json b/data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct-no-system-message/d8d52ed0-2eb6-4be3-9e4e-346a6b19ceca.json new file mode 100644 index 000000000..18d8709df --- /dev/null +++ b/data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct-no-system-message/d8d52ed0-2eb6-4be3-9e4e-346a6b19ceca.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/unsloth_Llama-3.2-1B-Instruct-no-system-message/1762652580.578731", + "retrieved_timestamp": "1762652580.578733", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "unsloth/Llama-3.2-1B-Instruct-no-system-message", + "developer": "unsloth", + "inference_platform": "unknown", + "id": "unsloth/Llama-3.2-1B-Instruct-no-system-message" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5649853499824908 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3543744783345775 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0755287009063444 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2726510067114094 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3340625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1668882978723404 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct/25ec2dbd-465f-40a9-80f0-e4001e621303.json b/data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct/25ec2dbd-465f-40a9-80f0-e4001e621303.json new file mode 100644 index 000000000..c6255d68a --- /dev/null +++ b/data/hfopenllm_v2/unsloth/Llama-3.2-1B-Instruct/25ec2dbd-465f-40a9-80f0-e4001e621303.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/unsloth_Llama-3.2-1B-Instruct/1762652580.578335", + "retrieved_timestamp": "1762652580.578335", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "unsloth/Llama-3.2-1B-Instruct", + "developer": "unsloth", + "inference_platform": "unknown", + "id": "unsloth/Llama-3.2-1B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5809973093613834 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34847036874553655 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0823262839879154 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3196145833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17420212765957446 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.236 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/unsloth/Phi-3-mini-4k-instruct/36d52065-1de2-4661-bf23-85276a8ede2f.json b/data/hfopenllm_v2/unsloth/Phi-3-mini-4k-instruct/36d52065-1de2-4661-bf23-85276a8ede2f.json new file mode 100644 index 000000000..109184ec3 --- /dev/null +++ b/data/hfopenllm_v2/unsloth/Phi-3-mini-4k-instruct/36d52065-1de2-4661-bf23-85276a8ede2f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/unsloth_Phi-3-mini-4k-instruct/1762652580.579097", + "retrieved_timestamp": "1762652580.5790982", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "unsloth/Phi-3-mini-4k-instruct", + "developer": "unsloth", + "inference_platform": "unknown", + "id": "unsloth/Phi-3-mini-4k-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.544027624480822 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5500239467441027 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16389728096676737 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32298657718120805 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42841666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4030917553191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/upstage/SOLAR-10.7B-Instruct-v1.0/9d750c83-0b27-437b-ae33-dd21a3313a04.json b/data/hfopenllm_v2/upstage/SOLAR-10.7B-Instruct-v1.0/9d750c83-0b27-437b-ae33-dd21a3313a04.json new file mode 100644 index 000000000..2a15ca696 --- /dev/null +++ b/data/hfopenllm_v2/upstage/SOLAR-10.7B-Instruct-v1.0/9d750c83-0b27-437b-ae33-dd21a3313a04.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/upstage_SOLAR-10.7B-Instruct-v1.0/1762652580.580213", + "retrieved_timestamp": "1762652580.58022", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "upstage/SOLAR-10.7B-Instruct-v1.0", + "developer": "upstage", + "inference_platform": "unknown", + "id": "upstage/SOLAR-10.7B-Instruct-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4736609972650345 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5162494941446991 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3899375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31382978723404253 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/upstage/SOLAR-10.7B-v1.0/b29dbad1-7c1c-4ed2-8f44-45d54fed4880.json b/data/hfopenllm_v2/upstage/SOLAR-10.7B-v1.0/b29dbad1-7c1c-4ed2-8f44-45d54fed4880.json new file mode 100644 index 000000000..89a50d0c9 --- /dev/null +++ b/data/hfopenllm_v2/upstage/SOLAR-10.7B-v1.0/b29dbad1-7c1c-4ed2-8f44-45d54fed4880.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/upstage_SOLAR-10.7B-v1.0/1762652580.5805068", + "retrieved_timestamp": "1762652580.580508", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "upstage/SOLAR-10.7B-v1.0", + "developer": "upstage", + "inference_platform": "unknown", + "id": "upstage/SOLAR-10.7B-v1.0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24212644671693329 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5093873084711799 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.026435045317220542 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28104026845637586 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43715624999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3400099734042553 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/upstage/solar-pro-preview-instruct/00398bb3-0c84-4b3b-bcf1-61e84313b3e3.json b/data/hfopenllm_v2/upstage/solar-pro-preview-instruct/00398bb3-0c84-4b3b-bcf1-61e84313b3e3.json new file mode 100644 index 000000000..cfccb749e --- /dev/null +++ b/data/hfopenllm_v2/upstage/solar-pro-preview-instruct/00398bb3-0c84-4b3b-bcf1-61e84313b3e3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/upstage_solar-pro-preview-instruct/1762652580.5807302", + "retrieved_timestamp": "1762652580.580731", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "upstage/solar-pro-preview-instruct", + "developer": "upstage", + "inference_platform": "unknown", + "id": "upstage/solar-pro-preview-instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8415814483348626 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6816843051379534 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22054380664652568 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37080536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44165625000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.52734375 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "SolarForCausalLM", + "params_billions": 22.14 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/utkmst/chimera-beta-test2-lora-merged/00620da3-d3ee-442a-a319-248906d959c0.json b/data/hfopenllm_v2/utkmst/chimera-beta-test2-lora-merged/00620da3-d3ee-442a-a319-248906d959c0.json new file mode 100644 index 000000000..b69fc790f --- /dev/null +++ b/data/hfopenllm_v2/utkmst/chimera-beta-test2-lora-merged/00620da3-d3ee-442a-a319-248906d959c0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/utkmst_chimera-beta-test2-lora-merged/1762652580.581129", + "retrieved_timestamp": "1762652580.581131", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "utkmst/chimera-beta-test2-lora-merged", + "developer": "utkmst", + "inference_platform": "unknown", + "id": "utkmst/chimera-beta-test2-lora-merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6054269338688014 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47957156724192185 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09516616314199396 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3036912751677852 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4117916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2992021276595745 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/uukuguy/speechless-coder-ds-6.7b/a3ba5a65-b137-42ad-868b-9aa5c24afd07.json b/data/hfopenllm_v2/uukuguy/speechless-coder-ds-6.7b/a3ba5a65-b137-42ad-868b-9aa5c24afd07.json new file mode 100644 index 000000000..85b5877b2 --- /dev/null +++ b/data/hfopenllm_v2/uukuguy/speechless-coder-ds-6.7b/a3ba5a65-b137-42ad-868b-9aa5c24afd07.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/uukuguy_speechless-coder-ds-6.7b/1762652580.582827", + "retrieved_timestamp": "1762652580.582828", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "uukuguy/speechless-coder-ds-6.7b", + "developer": "uukuguy", + "inference_platform": "unknown", + "id": "uukuguy/speechless-coder-ds-6.7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25046986440422525 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4036373344669979 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.021148036253776436 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3819375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.171875 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 6.7 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/uukuguy/speechless-instruct-mistral-7b-v0.2/e115938d-d343-4c03-8f3b-4d86768b2e49.json b/data/hfopenllm_v2/uukuguy/speechless-instruct-mistral-7b-v0.2/e115938d-d343-4c03-8f3b-4d86768b2e49.json new file mode 100644 index 000000000..b390bcb75 --- /dev/null +++ b/data/hfopenllm_v2/uukuguy/speechless-instruct-mistral-7b-v0.2/e115938d-d343-4c03-8f3b-4d86768b2e49.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/uukuguy_speechless-instruct-mistral-7b-v0.2/1762652580.5831082", + "retrieved_timestamp": "1762652580.5831091", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "uukuguy/speechless-instruct-mistral-7b-v0.2", + "developer": "uukuguy", + "inference_platform": "unknown", + "id": "uukuguy/speechless-instruct-mistral-7b-v0.2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3261324397044287 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4606667950681749 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04909365558912387 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28187919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4901770833333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2902260638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/uukuguy/speechless-zephyr-code-functionary-7b/82346a60-f31e-45ba-9fae-bd738321f390.json b/data/hfopenllm_v2/uukuguy/speechless-zephyr-code-functionary-7b/82346a60-f31e-45ba-9fae-bd738321f390.json new file mode 100644 index 000000000..bf31d4cff --- /dev/null +++ b/data/hfopenllm_v2/uukuguy/speechless-zephyr-code-functionary-7b/82346a60-f31e-45ba-9fae-bd738321f390.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/uukuguy_speechless-zephyr-code-functionary-7b/1762652580.583915", + "retrieved_timestamp": "1762652580.583916", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "uukuguy/speechless-zephyr-code-functionary-7b", + "developer": "uukuguy", + "inference_platform": "unknown", + "id": "uukuguy/speechless-zephyr-code-functionary-7b" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2695791610704043 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46642753957194555 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04229607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30033557046979864 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4267708333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3094248670212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/v000000/L3-8B-Stheno-v3.2-abliterated/33146dbb-8233-4f3d-9fd9-68cbacc3f293.json b/data/hfopenllm_v2/v000000/L3-8B-Stheno-v3.2-abliterated/33146dbb-8233-4f3d-9fd9-68cbacc3f293.json new file mode 100644 index 000000000..805a3a1e4 --- /dev/null +++ b/data/hfopenllm_v2/v000000/L3-8B-Stheno-v3.2-abliterated/33146dbb-8233-4f3d-9fd9-68cbacc3f293.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/v000000_L3-8B-Stheno-v3.2-abliterated/1762652580.584157", + "retrieved_timestamp": "1762652580.584158", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "v000000/L3-8B-Stheno-v3.2-abliterated", + "developer": "v000000", + "inference_platform": "unknown", + "id": "v000000/L3-8B-Stheno-v3.2-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6717720093795574 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5141439214918061 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06948640483383686 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30956375838926176 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36196875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3603723404255319 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/v000000/L3.1-Niitorm-8B-DPO-t0.0001/d90cef97-1e73-4068-bcb5-260a3f2586fe.json b/data/hfopenllm_v2/v000000/L3.1-Niitorm-8B-DPO-t0.0001/d90cef97-1e73-4068-bcb5-260a3f2586fe.json new file mode 100644 index 000000000..10eab44a6 --- /dev/null +++ b/data/hfopenllm_v2/v000000/L3.1-Niitorm-8B-DPO-t0.0001/d90cef97-1e73-4068-bcb5-260a3f2586fe.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/v000000_L3.1-Niitorm-8B-DPO-t0.0001/1762652580.5844421", + "retrieved_timestamp": "1762652580.5844429", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "v000000/L3.1-Niitorm-8B-DPO-t0.0001", + "developer": "v000000", + "inference_platform": "unknown", + "id": "v000000/L3.1-Niitorm-8B-DPO-t0.0001" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7688666072687137 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5134234526726582 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1623867069486405 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3879791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38663563829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/v000000/L3.1-Storniitova-8B/761f0cc0-c202-490d-93b4-447244f1e40a.json b/data/hfopenllm_v2/v000000/L3.1-Storniitova-8B/761f0cc0-c202-490d-93b4-447244f1e40a.json new file mode 100644 index 000000000..47de662a9 --- /dev/null +++ b/data/hfopenllm_v2/v000000/L3.1-Storniitova-8B/761f0cc0-c202-490d-93b4-447244f1e40a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/v000000_L3.1-Storniitova-8B/1762652580.584696", + "retrieved_timestamp": "1762652580.584697", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "v000000/L3.1-Storniitova-8B", + "developer": "v000000", + "inference_platform": "unknown", + "id": "v000000/L3.1-Storniitova-8B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7816560060639104 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5151452004311876 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14652567975830816 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4028958333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37757646276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno/1f1da15c-3a82-4dfb-9b73-4381c70eb1ef.json b/data/hfopenllm_v2/v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno/1f1da15c-3a82-4dfb-9b73-4381c70eb1ef.json new file mode 100644 index 000000000..3a5dfaea1 --- /dev/null +++ b/data/hfopenllm_v2/v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno/1f1da15c-3a82-4dfb-9b73-4381c70eb1ef.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/v000000_Qwen2.5-14B-Gutenberg-Instruct-Slerpeno/1762652580.585153", + "retrieved_timestamp": "1762652580.585153", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno", + "developer": "v000000", + "inference_platform": "unknown", + "id": "v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8197493760998595 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.639010174859259 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5324773413897281 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3313758389261745 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4113645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4923537234042553 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vhab10/Llama-3.1-8B-Base-Instruct-SLERP/982455a4-fb4f-4eed-96a0-c46d9eb11937.json b/data/hfopenllm_v2/vhab10/Llama-3.1-8B-Base-Instruct-SLERP/982455a4-fb4f-4eed-96a0-c46d9eb11937.json new file mode 100644 index 000000000..55d56e91f --- /dev/null +++ b/data/hfopenllm_v2/vhab10/Llama-3.1-8B-Base-Instruct-SLERP/982455a4-fb4f-4eed-96a0-c46d9eb11937.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vhab10_Llama-3.1-8B-Base-Instruct-SLERP/1762652580.585581", + "retrieved_timestamp": "1762652580.585582", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vhab10/Llama-3.1-8B-Base-Instruct-SLERP", + "developer": "vhab10", + "inference_platform": "unknown", + "id": "vhab10/Llama-3.1-8B-Base-Instruct-SLERP" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.290711977552893 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5057443268070797 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.12009063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2961409395973154 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40106250000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3621176861702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vhab10/Llama-3.2-Instruct-3B-TIES/22f8bb3f-4794-46b1-828e-75711a1233bd.json b/data/hfopenllm_v2/vhab10/Llama-3.2-Instruct-3B-TIES/22f8bb3f-4794-46b1-828e-75711a1233bd.json new file mode 100644 index 000000000..289c1b35d --- /dev/null +++ b/data/hfopenllm_v2/vhab10/Llama-3.2-Instruct-3B-TIES/22f8bb3f-4794-46b1-828e-75711a1233bd.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vhab10_Llama-3.2-Instruct-3B-TIES/1762652580.585841", + "retrieved_timestamp": "1762652580.585842", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vhab10/Llama-3.2-Instruct-3B-TIES", + "developer": "vhab10", + "inference_platform": "unknown", + "id": "vhab10/Llama-3.2-Instruct-3B-TIES" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4727367828472896 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43323649966514094 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09818731117824774 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34965625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2915558510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.848 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vicgalle/CarbonBeagle-11B-truthy/d67aa278-fcc9-4404-a87a-4be9e1bdaa1a.json b/data/hfopenllm_v2/vicgalle/CarbonBeagle-11B-truthy/d67aa278-fcc9-4404-a87a-4be9e1bdaa1a.json new file mode 100644 index 000000000..34a84e3d8 --- /dev/null +++ b/data/hfopenllm_v2/vicgalle/CarbonBeagle-11B-truthy/d67aa278-fcc9-4404-a87a-4be9e1bdaa1a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vicgalle_CarbonBeagle-11B-truthy/1762652580.586528", + "retrieved_timestamp": "1762652580.586528", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vicgalle/CarbonBeagle-11B-truthy", + "developer": "vicgalle", + "inference_platform": "unknown", + "id": "vicgalle/CarbonBeagle-11B-truthy" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5212214701436633 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5348420085288232 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04909365558912387 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29949664429530204 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37396874999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.335688164893617 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vicgalle/CarbonBeagle-11B/b906411a-6663-4c9f-9fe6-4d60e99e4e41.json b/data/hfopenllm_v2/vicgalle/CarbonBeagle-11B/b906411a-6663-4c9f-9fe6-4d60e99e4e41.json new file mode 100644 index 000000000..90b820a9d --- /dev/null +++ b/data/hfopenllm_v2/vicgalle/CarbonBeagle-11B/b906411a-6663-4c9f-9fe6-4d60e99e4e41.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vicgalle_CarbonBeagle-11B/1762652580.5862951", + "retrieved_timestamp": "1762652580.5862951", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vicgalle/CarbonBeagle-11B", + "developer": "vicgalle", + "inference_platform": "unknown", + "id": "vicgalle/CarbonBeagle-11B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5415298075772285 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5293652486530874 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.061933534743202415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40203125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32762632978723405 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vicgalle/Configurable-Llama-3.1-8B-Instruct/82a3253a-7a6e-4d75-8ea2-114b4dee6d16.json b/data/hfopenllm_v2/vicgalle/Configurable-Llama-3.1-8B-Instruct/82a3253a-7a6e-4d75-8ea2-114b4dee6d16.json new file mode 100644 index 000000000..88dc727de --- /dev/null +++ b/data/hfopenllm_v2/vicgalle/Configurable-Llama-3.1-8B-Instruct/82a3253a-7a6e-4d75-8ea2-114b4dee6d16.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vicgalle_Configurable-Llama-3.1-8B-Instruct/1762652580.586963", + "retrieved_timestamp": "1762652580.586964", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vicgalle/Configurable-Llama-3.1-8B-Instruct", + "developer": "vicgalle", + "inference_platform": "unknown", + "id": "vicgalle/Configurable-Llama-3.1-8B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8312399987588488 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5044756225072481 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1729607250755287 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3845416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3592087765957447 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vicgalle/Configurable-Yi-1.5-9B-Chat/0a933130-dca9-435c-a529-16065b540aab.json b/data/hfopenllm_v2/vicgalle/Configurable-Yi-1.5-9B-Chat/0a933130-dca9-435c-a529-16065b540aab.json new file mode 100644 index 000000000..790070bfc --- /dev/null +++ b/data/hfopenllm_v2/vicgalle/Configurable-Yi-1.5-9B-Chat/0a933130-dca9-435c-a529-16065b540aab.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vicgalle_Configurable-Yi-1.5-9B-Chat/1762652580.587164", + "retrieved_timestamp": "1762652580.5871649", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vicgalle/Configurable-Yi-1.5-9B-Chat", + "developer": "vicgalle", + "inference_platform": "unknown", + "id": "vicgalle/Configurable-Yi-1.5-9B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43234506664538974 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5452196737175008 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20468277945619334 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34312080536912754 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42711458333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4015126329787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.829 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vicgalle/ConfigurableBeagle-11B/3fd95536-ec61-4470-9082-14a116d20e80.json b/data/hfopenllm_v2/vicgalle/ConfigurableBeagle-11B/3fd95536-ec61-4470-9082-14a116d20e80.json new file mode 100644 index 000000000..590a3c990 --- /dev/null +++ b/data/hfopenllm_v2/vicgalle/ConfigurableBeagle-11B/3fd95536-ec61-4470-9082-14a116d20e80.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vicgalle_ConfigurableBeagle-11B/1762652580.587369", + "retrieved_timestamp": "1762652580.58737", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vicgalle/ConfigurableBeagle-11B", + "developer": "vicgalle", + "inference_platform": "unknown", + "id": "vicgalle/ConfigurableBeagle-11B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5834452585805663 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5286592318626696 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04305135951661632 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39530208333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33743351063829785 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vicgalle/ConfigurableHermes-7B/176727e5-31dc-462a-8210-4735543c32f2.json b/data/hfopenllm_v2/vicgalle/ConfigurableHermes-7B/176727e5-31dc-462a-8210-4735543c32f2.json new file mode 100644 index 000000000..9bdb32b09 --- /dev/null +++ b/data/hfopenllm_v2/vicgalle/ConfigurableHermes-7B/176727e5-31dc-462a-8210-4735543c32f2.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vicgalle_ConfigurableHermes-7B/1762652580.5875661", + "retrieved_timestamp": "1762652580.587567", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vicgalle/ConfigurableHermes-7B", + "developer": "vicgalle", + "inference_platform": "unknown", + "id": "vicgalle/ConfigurableHermes-7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5410798902467675 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4572969627830424 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04758308157099698 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27684563758389263 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4056875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3025265957446808 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vicgalle/ConfigurableSOLAR-10.7B/2dec3c49-01f0-4940-aa45-e7a6b2648e8f.json b/data/hfopenllm_v2/vicgalle/ConfigurableSOLAR-10.7B/2dec3c49-01f0-4940-aa45-e7a6b2648e8f.json new file mode 100644 index 000000000..5218ff68a --- /dev/null +++ b/data/hfopenllm_v2/vicgalle/ConfigurableSOLAR-10.7B/2dec3c49-01f0-4940-aa45-e7a6b2648e8f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vicgalle_ConfigurableSOLAR-10.7B/1762652580.587757", + "retrieved_timestamp": "1762652580.587758", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vicgalle/ConfigurableSOLAR-10.7B", + "developer": "vicgalle", + "inference_platform": "unknown", + "id": "vicgalle/ConfigurableSOLAR-10.7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5099558061499045 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48668100977360457 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06646525679758308 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38047916666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31732047872340424 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vicgalle/Merge-Mixtral-Prometheus-8x7B/e6a0cf8f-323d-40c0-90c2-0e2071321df0.json b/data/hfopenllm_v2/vicgalle/Merge-Mixtral-Prometheus-8x7B/e6a0cf8f-323d-40c0-90c2-0e2071321df0.json new file mode 100644 index 000000000..745cdc5eb --- /dev/null +++ b/data/hfopenllm_v2/vicgalle/Merge-Mixtral-Prometheus-8x7B/e6a0cf8f-323d-40c0-90c2-0e2071321df0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vicgalle_Merge-Mixtral-Prometheus-8x7B/1762652580.588394", + "retrieved_timestamp": "1762652580.588395", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vicgalle/Merge-Mixtral-Prometheus-8x7B", + "developer": "vicgalle", + "inference_platform": "unknown", + "id": "vicgalle/Merge-Mixtral-Prometheus-8x7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5744025851407598 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5351498071096573 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09290030211480363 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3087248322147651 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40975 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3683510638297872 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vihangd/smart-dan-sft-v0.1/00de0fac-e1a7-449a-969d-624cbe9adab1.json b/data/hfopenllm_v2/vihangd/smart-dan-sft-v0.1/00de0fac-e1a7-449a-969d-624cbe9adab1.json new file mode 100644 index 000000000..7d21f0512 --- /dev/null +++ b/data/hfopenllm_v2/vihangd/smart-dan-sft-v0.1/00de0fac-e1a7-449a-969d-624cbe9adab1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vihangd_smart-dan-sft-v0.1/1762652580.589078", + "retrieved_timestamp": "1762652580.5890791", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vihangd/smart-dan-sft-v0.1", + "developer": "vihangd", + "inference_platform": "unknown", + "id": "vihangd/smart-dan-sft-v0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15764615664215392 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30617689187138886 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.009818731117824773 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2550335570469799 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35018750000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11419547872340426 + } + } + ], + "additional_details": { + "precision": "4bit", + "architecture": "LlamaForCausalLM", + "params_billions": 0.379 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/voidful/smol-360m-ft/b93d3a57-2535-4150-a2db-71a50569e6ae.json b/data/hfopenllm_v2/voidful/smol-360m-ft/b93d3a57-2535-4150-a2db-71a50569e6ae.json new file mode 100644 index 000000000..a7837edc0 --- /dev/null +++ b/data/hfopenllm_v2/voidful/smol-360m-ft/b93d3a57-2535-4150-a2db-71a50569e6ae.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/voidful_smol-360m-ft/1762652580.589319", + "retrieved_timestamp": "1762652580.58932", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "voidful/smol-360m-ft", + "developer": "voidful", + "inference_platform": "unknown", + "id": "voidful/smol-360m-ft" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2013103011121602 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3011946898842932 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.008308157099697885 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24580536912751677 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3713645833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10871010638297872 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.362 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vonjack/MobileLLM-125M-HF/2e06f258-9d91-4734-aacc-f417fddad77c.json b/data/hfopenllm_v2/vonjack/MobileLLM-125M-HF/2e06f258-9d91-4734-aacc-f417fddad77c.json new file mode 100644 index 000000000..73f58fc34 --- /dev/null +++ b/data/hfopenllm_v2/vonjack/MobileLLM-125M-HF/2e06f258-9d91-4734-aacc-f417fddad77c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vonjack_MobileLLM-125M-HF/1762652580.589566", + "retrieved_timestamp": "1762652580.589567", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vonjack/MobileLLM-125M-HF", + "developer": "vonjack", + "inference_platform": "unknown", + "id": "vonjack/MobileLLM-125M-HF" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.21072753627042912 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30272988561565645 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.00906344410876133 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37818749999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1163563829787234 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.125 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vonjack/Phi-3-mini-4k-instruct-LLaMAfied/be3635bb-83de-4cbf-8e0f-3a84ee78bd67.json b/data/hfopenllm_v2/vonjack/Phi-3-mini-4k-instruct-LLaMAfied/be3635bb-83de-4cbf-8e0f-3a84ee78bd67.json new file mode 100644 index 000000000..84502c8b1 --- /dev/null +++ b/data/hfopenllm_v2/vonjack/Phi-3-mini-4k-instruct-LLaMAfied/be3635bb-83de-4cbf-8e0f-3a84ee78bd67.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vonjack_Phi-3-mini-4k-instruct-LLaMAfied/1762652580.589802", + "retrieved_timestamp": "1762652580.589803", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vonjack/Phi-3-mini-4k-instruct-LLaMAfied", + "developer": "vonjack", + "inference_platform": "unknown", + "id": "vonjack/Phi-3-mini-4k-instruct-LLaMAfied" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5787488308798432 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5740684031598843 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.13821752265861026 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33053691275167785 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3923541666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3885472074468085 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.821 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vonjack/Phi-3.5-mini-instruct-hermes-fc-json/19cd2513-03e8-4d78-b222-566fe3928d2b.json b/data/hfopenllm_v2/vonjack/Phi-3.5-mini-instruct-hermes-fc-json/19cd2513-03e8-4d78-b222-566fe3928d2b.json new file mode 100644 index 000000000..e704adc16 --- /dev/null +++ b/data/hfopenllm_v2/vonjack/Phi-3.5-mini-instruct-hermes-fc-json/19cd2513-03e8-4d78-b222-566fe3928d2b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vonjack_Phi-3.5-mini-instruct-hermes-fc-json/1762652580.5900009", + "retrieved_timestamp": "1762652580.5900018", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vonjack/Phi-3.5-mini-instruct-hermes-fc-json", + "developer": "vonjack", + "inference_platform": "unknown", + "id": "vonjack/Phi-3.5-mini-instruct-hermes-fc-json" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14158432957885078 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29747555432824196 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0075528700906344415 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25419463087248323 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40413541666666664 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11386303191489362 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "?", + "params_billions": 4.132 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vonjack/SmolLM2-1.7B-Merged/97bab408-a5f5-4363-b530-dc4a6966c859.json b/data/hfopenllm_v2/vonjack/SmolLM2-1.7B-Merged/97bab408-a5f5-4363-b530-dc4a6966c859.json new file mode 100644 index 000000000..5e2aac400 --- /dev/null +++ b/data/hfopenllm_v2/vonjack/SmolLM2-1.7B-Merged/97bab408-a5f5-4363-b530-dc4a6966c859.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vonjack_SmolLM2-1.7B-Merged/1762652580.5904331", + "retrieved_timestamp": "1762652580.590434", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vonjack/SmolLM2-1.7B-Merged", + "developer": "vonjack", + "inference_platform": "unknown", + "id": "vonjack/SmolLM2-1.7B-Merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36979658417443495 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3586553457965105 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06268882175226587 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27936241610738255 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34079166666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2047872340425532 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 1.711 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vonjack/SmolLM2-135M-Merged/2c1cab05-b63f-49ca-a709-b5a4e859ba82.json b/data/hfopenllm_v2/vonjack/SmolLM2-135M-Merged/2c1cab05-b63f-49ca-a709-b5a4e859ba82.json new file mode 100644 index 000000000..7e7da2356 --- /dev/null +++ b/data/hfopenllm_v2/vonjack/SmolLM2-135M-Merged/2c1cab05-b63f-49ca-a709-b5a4e859ba82.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vonjack_SmolLM2-135M-Merged/1762652580.590627", + "retrieved_timestamp": "1762652580.590627", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vonjack/SmolLM2-135M-Merged", + "developer": "vonjack", + "inference_platform": "unknown", + "id": "vonjack/SmolLM2-135M-Merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24829674153468353 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3099931265410582 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.011329305135951661 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23825503355704697 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36618749999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11120345744680851 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.135 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/vonjack/SmolLM2-360M-Merged/f1980c69-8c24-4fcd-ace1-797195026c7b.json b/data/hfopenllm_v2/vonjack/SmolLM2-360M-Merged/f1980c69-8c24-4fcd-ace1-797195026c7b.json new file mode 100644 index 000000000..a23754a92 --- /dev/null +++ b/data/hfopenllm_v2/vonjack/SmolLM2-360M-Merged/f1980c69-8c24-4fcd-ace1-797195026c7b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/vonjack_SmolLM2-360M-Merged/1762652580.590822", + "retrieved_timestamp": "1762652580.590823", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "vonjack/SmolLM2-360M-Merged", + "developer": "vonjack", + "inference_platform": "unknown", + "id": "vonjack/SmolLM2-360M-Merged" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.32058715319795916 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31548533684955926 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.017371601208459216 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2558724832214765 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3527291666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10979055851063829 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 0.362 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored/9add85f6-b577-449e-8a2f-ae77a2588bc7.json b/data/hfopenllm_v2/w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored/9add85f6-b577-449e-8a2f-ae77a2588bc7.json new file mode 100644 index 000000000..1cba78f1f --- /dev/null +++ b/data/hfopenllm_v2/w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored/9add85f6-b577-449e-8a2f-ae77a2588bc7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/w4r10ck_SOLAR-10.7B-Instruct-v1.0-uncensored/1762652580.5912771", + "retrieved_timestamp": "1762652580.591278", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored", + "developer": "w4r10ck", + "inference_platform": "unknown", + "id": "w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38840609582574237 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5301525050503222 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06570996978851963 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4639479166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3343583776595745 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 10.732 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp/d2451e41-e4b0-4945-9ace-1b046b11528b.json b/data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp/d2451e41-e4b0-4945-9ace-1b046b11528b.json new file mode 100644 index 000000000..343f68a7a --- /dev/null +++ b/data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp/d2451e41-e4b0-4945-9ace-1b046b11528b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/wanlige_li-14b-v0.4-slerp/1762652580.591778", + "retrieved_timestamp": "1762652580.591778", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wanlige/li-14b-v0.4-slerp", + "developer": "wanlige", + "inference_platform": "unknown", + "id": "wanlige/li-14b-v0.4-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4605967721201967 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6587180444175935 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41918429003021146 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4001677852348993 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47675 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5372340425531915 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp0.1/54a93ff0-bff3-4252-ba4a-e99f06b46896.json b/data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp0.1/54a93ff0-bff3-4252-ba4a-e99f06b46896.json new file mode 100644 index 000000000..6b55f5562 --- /dev/null +++ b/data/hfopenllm_v2/wanlige/li-14b-v0.4-slerp0.1/54a93ff0-bff3-4252-ba4a-e99f06b46896.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/wanlige_li-14b-v0.4-slerp0.1/1762652580.5919738", + "retrieved_timestamp": "1762652580.591975", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wanlige/li-14b-v0.4-slerp0.1", + "developer": "wanlige", + "inference_platform": "unknown", + "id": "wanlige/li-14b-v0.4-slerp0.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7922722819895655 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6571741435852609 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5332326283987915 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35906040268456374 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4206666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5294215425531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/wanlige/li-14b-v0.4/8965f266-28f1-43f2-b03c-acc4a9478b7c.json b/data/hfopenllm_v2/wanlige/li-14b-v0.4/8965f266-28f1-43f2-b03c-acc4a9478b7c.json new file mode 100644 index 000000000..2cdd6fb8e --- /dev/null +++ b/data/hfopenllm_v2/wanlige/li-14b-v0.4/8965f266-28f1-43f2-b03c-acc4a9478b7c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/wanlige_li-14b-v0.4/1762652580.591545", + "retrieved_timestamp": "1762652580.591546", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wanlige/li-14b-v0.4", + "developer": "wanlige", + "inference_platform": "unknown", + "id": "wanlige/li-14b-v0.4" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.813279875175645 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6544457993364277 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5574018126888217 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3389261744966443 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.446 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5167054521276596 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.77 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/wannaphong/KhanomTanLLM-Instruct/681b02e4-7b57-42b7-9550-59c664511b01.json b/data/hfopenllm_v2/wannaphong/KhanomTanLLM-Instruct/681b02e4-7b57-42b7-9550-59c664511b01.json new file mode 100644 index 000000000..37ee114ad --- /dev/null +++ b/data/hfopenllm_v2/wannaphong/KhanomTanLLM-Instruct/681b02e4-7b57-42b7-9550-59c664511b01.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/wannaphong_KhanomTanLLM-Instruct/1762652580.59218", + "retrieved_timestamp": "1762652580.59218", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "wannaphong/KhanomTanLLM-Instruct", + "developer": "wannaphong", + "inference_platform": "unknown", + "id": "wannaphong/KhanomTanLLM-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.16211762567764643 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30931233392513263 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.013595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634228187919463 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37006249999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1118683510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.447 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/waqasali1707/Beast-Soul-new/c04bef75-d3cc-463e-ac24-a2b18d3611af.json b/data/hfopenllm_v2/waqasali1707/Beast-Soul-new/c04bef75-d3cc-463e-ac24-a2b18d3611af.json new file mode 100644 index 000000000..06086db5d --- /dev/null +++ b/data/hfopenllm_v2/waqasali1707/Beast-Soul-new/c04bef75-d3cc-463e-ac24-a2b18d3611af.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/waqasali1707_Beast-Soul-new/1762652580.592428", + "retrieved_timestamp": "1762652580.592428", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "waqasali1707/Beast-Soul-new", + "developer": "waqasali1707", + "inference_platform": "unknown", + "id": "waqasali1707/Beast-Soul-new" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5029865202108184 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.522494907014536 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0702416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4485625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3107546542553192 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/weathermanj/Menda-3B-500/468d60fa-5c01-41bd-a791-e0e86c2d02bf.json b/data/hfopenllm_v2/weathermanj/Menda-3B-500/468d60fa-5c01-41bd-a791-e0e86c2d02bf.json new file mode 100644 index 000000000..90a4be7c5 --- /dev/null +++ b/data/hfopenllm_v2/weathermanj/Menda-3B-500/468d60fa-5c01-41bd-a791-e0e86c2d02bf.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/weathermanj_Menda-3B-500/1762652580.593058", + "retrieved_timestamp": "1762652580.593059", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "weathermanj/Menda-3B-500", + "developer": "weathermanj", + "inference_platform": "unknown", + "id": "weathermanj/Menda-3B-500" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6353021095138676 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4766312519942703 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3723564954682779 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39679166666666665 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3474900265957447 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/weathermanj/Menda-3b-750/9f1f8a2e-3a63-4b8e-85e9-141477fddcc3.json b/data/hfopenllm_v2/weathermanj/Menda-3b-750/9f1f8a2e-3a63-4b8e-85e9-141477fddcc3.json new file mode 100644 index 000000000..1f4e2653e --- /dev/null +++ b/data/hfopenllm_v2/weathermanj/Menda-3b-750/9f1f8a2e-3a63-4b8e-85e9-141477fddcc3.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/weathermanj_Menda-3b-750/1762652580.593308", + "retrieved_timestamp": "1762652580.593309", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "weathermanj/Menda-3b-750", + "developer": "weathermanj", + "inference_platform": "unknown", + "id": "weathermanj/Menda-3b-750" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6335035483627884 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4736825577251204 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3716012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39418749999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3505651595744681 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/weathermanj/Menda-3b-Optim-100/e33fb04e-ac99-423f-ac8c-5268e527bf34.json b/data/hfopenllm_v2/weathermanj/Menda-3b-Optim-100/e33fb04e-ac99-423f-ac8c-5268e527bf34.json new file mode 100644 index 000000000..42ef00219 --- /dev/null +++ b/data/hfopenllm_v2/weathermanj/Menda-3b-Optim-100/e33fb04e-ac99-423f-ac8c-5268e527bf34.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/weathermanj_Menda-3b-Optim-100/1762652580.5935092", + "retrieved_timestamp": "1762652580.59351", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "weathermanj/Menda-3b-Optim-100", + "developer": "weathermanj", + "inference_platform": "unknown", + "id": "weathermanj/Menda-3b-Optim-100" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6398234462337709 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47348022177793836 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3716012084592145 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39930208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3460771276595745 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/weathermanj/Menda-3b-Optim-200/b8b84752-c112-47be-8a86-35ca0e578301.json b/data/hfopenllm_v2/weathermanj/Menda-3b-Optim-200/b8b84752-c112-47be-8a86-35ca0e578301.json new file mode 100644 index 000000000..70ee26165 --- /dev/null +++ b/data/hfopenllm_v2/weathermanj/Menda-3b-Optim-200/b8b84752-c112-47be-8a86-35ca0e578301.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/weathermanj_Menda-3b-Optim-200/1762652580.5937102", + "retrieved_timestamp": "1762652580.5937111", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "weathermanj/Menda-3b-Optim-200", + "developer": "weathermanj", + "inference_platform": "unknown", + "id": "weathermanj/Menda-3b-Optim-200" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6374752323834094 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.47460604908284837 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3731117824773414 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2827181208053691 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40330208333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3484042553191489 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/win10/ArliAI-RPMax-v1.3-merge-13.3B/16777b0f-3063-45eb-be07-294d13f975ac.json b/data/hfopenllm_v2/win10/ArliAI-RPMax-v1.3-merge-13.3B/16777b0f-3063-45eb-be07-294d13f975ac.json new file mode 100644 index 000000000..ab7ac095c --- /dev/null +++ b/data/hfopenllm_v2/win10/ArliAI-RPMax-v1.3-merge-13.3B/16777b0f-3063-45eb-be07-294d13f975ac.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/win10_ArliAI-RPMax-v1.3-merge-13.3B/1762652580.593927", + "retrieved_timestamp": "1762652580.5939279", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "win10/ArliAI-RPMax-v1.3-merge-13.3B", + "developer": "win10", + "inference_platform": "unknown", + "id": "win10/ArliAI-RPMax-v1.3-merge-13.3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3038260703821416 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4581388671914119 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03927492447129909 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4325104166666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31998005319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.265 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/win10/Breeze-13B-32k-Instruct-v1_0/bc990db1-c6d9-4113-9946-466bfd5cf9cc.json b/data/hfopenllm_v2/win10/Breeze-13B-32k-Instruct-v1_0/bc990db1-c6d9-4113-9946-466bfd5cf9cc.json new file mode 100644 index 000000000..17f28250d --- /dev/null +++ b/data/hfopenllm_v2/win10/Breeze-13B-32k-Instruct-v1_0/bc990db1-c6d9-4113-9946-466bfd5cf9cc.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/win10_Breeze-13B-32k-Instruct-v1_0/1762652580.5941818", + "retrieved_timestamp": "1762652580.594183", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "win10/Breeze-13B-32k-Instruct-v1_0", + "developer": "win10", + "inference_platform": "unknown", + "id": "win10/Breeze-13B-32k-Instruct-v1_0" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35843118481185476 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.46112304746712934 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.01283987915407855 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26426174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42019791666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2568151595744681 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MistralForCausalLM", + "params_billions": 12.726 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/win10/Llama-3.2-3B-Instruct-24-9-29/bf253a63-4685-4e51-8a0d-5209306926c8.json b/data/hfopenllm_v2/win10/Llama-3.2-3B-Instruct-24-9-29/bf253a63-4685-4e51-8a0d-5209306926c8.json new file mode 100644 index 000000000..db209ccd1 --- /dev/null +++ b/data/hfopenllm_v2/win10/Llama-3.2-3B-Instruct-24-9-29/bf253a63-4685-4e51-8a0d-5209306926c8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/win10_Llama-3.2-3B-Instruct-24-9-29/1762652580.594629", + "retrieved_timestamp": "1762652580.59463", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "win10/Llama-3.2-3B-Instruct-24-9-29", + "developer": "win10", + "inference_platform": "unknown", + "id": "win10/Llama-3.2-3B-Instruct-24-9-29" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7332211864519476 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4614234982167829 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.17069486404833836 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27432885906040266 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35552083333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3228058510638298 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/win10/Qwen2.5-2B-Instruct/143dc973-1063-45d6-9747-9f24a9ae5657.json b/data/hfopenllm_v2/win10/Qwen2.5-2B-Instruct/143dc973-1063-45d6-9747-9f24a9ae5657.json new file mode 100644 index 000000000..40e21499f --- /dev/null +++ b/data/hfopenllm_v2/win10/Qwen2.5-2B-Instruct/143dc973-1063-45d6-9747-9f24a9ae5657.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/win10_Qwen2.5-2B-Instruct/1762652580.5952861", + "retrieved_timestamp": "1762652580.595287", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "win10/Qwen2.5-2B-Instruct", + "developer": "win10", + "inference_platform": "unknown", + "id": "win10/Qwen2.5-2B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.22728914834860392 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3705905854806977 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.022658610271903322 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2676174496644295 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43784375000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.19340093085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 2.9 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/win10/llama3-13.45b-Instruct/3c9eb291-6171-4d40-aa5f-58d39738fdcb.json b/data/hfopenllm_v2/win10/llama3-13.45b-Instruct/3c9eb291-6171-4d40-aa5f-58d39738fdcb.json new file mode 100644 index 000000000..b09167c33 --- /dev/null +++ b/data/hfopenllm_v2/win10/llama3-13.45b-Instruct/3c9eb291-6171-4d40-aa5f-58d39738fdcb.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/win10_llama3-13.45b-Instruct/1762652580.595499", + "retrieved_timestamp": "1762652580.5955", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "win10/llama3-13.45b-Instruct", + "developer": "win10", + "inference_platform": "unknown", + "id": "win10/llama3-13.45b-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4144348107465968 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.486541523346714 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.02416918429003021 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38476041666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3345246010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 13.265 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/win10/miscii-14b-1M-0128/c19f2ddd-7710-4844-9f1f-c0cd1c7e3e41.json b/data/hfopenllm_v2/win10/miscii-14b-1M-0128/c19f2ddd-7710-4844-9f1f-c0cd1c7e3e41.json new file mode 100644 index 000000000..cee77305f --- /dev/null +++ b/data/hfopenllm_v2/win10/miscii-14b-1M-0128/c19f2ddd-7710-4844-9f1f-c0cd1c7e3e41.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/win10_miscii-14b-1M-0128/1762652580.5956988", + "retrieved_timestamp": "1762652580.5957", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "win10/miscii-14b-1M-0128", + "developer": "win10", + "inference_platform": "unknown", + "id": "win10/miscii-14b-1M-0128" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4180818007331658 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5741994518517665 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4773413897280967 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3825503355704698 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5431041666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.44913563829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 14.766 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xMaulana/FinMatcha-3B-Instruct/105021c8-c214-4a6a-ac3b-747c4c48886e.json b/data/hfopenllm_v2/xMaulana/FinMatcha-3B-Instruct/105021c8-c214-4a6a-ac3b-747c4c48886e.json new file mode 100644 index 000000000..0f6f4f189 --- /dev/null +++ b/data/hfopenllm_v2/xMaulana/FinMatcha-3B-Instruct/105021c8-c214-4a6a-ac3b-747c4c48886e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xMaulana_FinMatcha-3B-Instruct/1762652580.5969138", + "retrieved_timestamp": "1762652580.5969138", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xMaulana/FinMatcha-3B-Instruct", + "developer": "xMaulana", + "inference_platform": "unknown", + "id": "xMaulana/FinMatcha-3B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7548283000217202 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.453555265188897 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.14350453172205438 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26929530201342283 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36333333333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3181515957446808 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 3.213 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xinchen9/llama3-b8-ft-dis/5ea3a084-bc30-4390-97a2-1933c5422790.json b/data/hfopenllm_v2/xinchen9/llama3-b8-ft-dis/5ea3a084-bc30-4390-97a2-1933c5422790.json new file mode 100644 index 000000000..f897275d8 --- /dev/null +++ b/data/hfopenllm_v2/xinchen9/llama3-b8-ft-dis/5ea3a084-bc30-4390-97a2-1933c5422790.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xinchen9_llama3-b8-ft-dis/1762652580.598142", + "retrieved_timestamp": "1762652580.598142", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xinchen9/llama3-b8-ft-dis", + "developer": "xinchen9", + "inference_platform": "unknown", + "id": "xinchen9/llama3-b8-ft-dis" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.154598687039278 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4625789691224553 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.03927492447129909 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.31291946308724833 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.365375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3243849734042553 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table/a9888e61-bd14-4769-b620-cda908c8ba3e.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table/a9888e61-bd14-4769-b620-cda908c8ba3e.json new file mode 100644 index 000000000..c2f0c0c0c --- /dev/null +++ b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table/a9888e61-bd14-4769-b620-cda908c8ba3e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table/1762652580.598392", + "retrieved_timestamp": "1762652580.5983932", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table", + "developer": "xkp24", + "inference_platform": "unknown", + "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6374752323834094 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4912273915261041 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09214501510574018 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38199999999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3686003989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table/99d6ac02-a8f8-409f-ad9d-ce5fd7ed6fe0.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table/99d6ac02-a8f8-409f-ad9d-ce5fd7ed6fe0.json new file mode 100644 index 000000000..61b74758d --- /dev/null +++ b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table/99d6ac02-a8f8-409f-ad9d-ce5fd7ed6fe0.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table/1762652580.598656", + "retrieved_timestamp": "1762652580.598656", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table", + "developer": "xkp24", + "inference_platform": "unknown", + "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7274509412802475 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5056858683165713 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08459214501510574 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38190624999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3696808510638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table/71a54215-e97a-4ee6-928c-344bd690b020.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table/71a54215-e97a-4ee6-928c-344bd690b020.json new file mode 100644 index 000000000..34e0c6ccd --- /dev/null +++ b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table/71a54215-e97a-4ee6-928c-344bd690b020.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table/1762652580.598878", + "retrieved_timestamp": "1762652580.5988789", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table", + "developer": "xkp24", + "inference_platform": "unknown", + "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6568593553992297 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49518319163897667 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35939583333333336 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37017952127659576 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table/2fe15418-16bc-4f60-bad2-7329a3670507.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table/2fe15418-16bc-4f60-bad2-7329a3670507.json new file mode 100644 index 000000000..0fb4745ca --- /dev/null +++ b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table/2fe15418-16bc-4f60-bad2-7329a3670507.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table/1762652580.599085", + "retrieved_timestamp": "1762652580.599086", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table", + "developer": "xkp24", + "inference_platform": "unknown", + "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6620799478716473 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.500449109241973 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08610271903323263 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3805416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3599567819148936 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001/f6bcff0a-559b-44c1-9c70-259446b3ebe5.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001/f6bcff0a-559b-44c1-9c70-259446b3ebe5.json new file mode 100644 index 000000000..ffb14c17b --- /dev/null +++ b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001/f6bcff0a-559b-44c1-9c70-259446b3ebe5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001/1762652580.599285", + "retrieved_timestamp": "1762652580.599286", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001", + "developer": "xkp24", + "inference_platform": "unknown", + "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6042278931014153 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4936062924421171 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09969788519637462 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3793333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.370844414893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002/4deeeff7-f62d-4c42-b32a-98bdd773a758.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002/4deeeff7-f62d-4c42-b32a-98bdd773a758.json new file mode 100644 index 000000000..5b9ccfcdf --- /dev/null +++ b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002/4deeeff7-f62d-4c42-b32a-98bdd773a758.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002/1762652580.599496", + "retrieved_timestamp": "1762652580.5994968", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002", + "developer": "xkp24", + "inference_platform": "unknown", + "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7131876753680235 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4996376240562969 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08534743202416918 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3872083333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3664394946808511 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001/8ec55b3f-e425-4ee9-98d5-dac775977514.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001/8ec55b3f-e425-4ee9-98d5-dac775977514.json new file mode 100644 index 000000000..510c5668e --- /dev/null +++ b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001/8ec55b3f-e425-4ee9-98d5-dac775977514.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001/1762652580.599715", + "retrieved_timestamp": "1762652580.599715", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001", + "developer": "xkp24", + "inference_platform": "unknown", + "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.594710922574325 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48992211803775065 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10725075528700906 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35809374999999993 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37042885638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002/c583cff2-2944-4afb-b32e-c0f49bc0d3b7.json b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002/c583cff2-2944-4afb-b32e-c0f49bc0d3b7.json new file mode 100644 index 000000000..008e02ea3 --- /dev/null +++ b/data/hfopenllm_v2/xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002/c583cff2-2944-4afb-b32e-c0f49bc0d3b7.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002/1762652580.599936", + "retrieved_timestamp": "1762652580.599936", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002", + "developer": "xkp24", + "inference_platform": "unknown", + "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6453188650558297 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4951075713814987 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09365558912386707 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.393875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3529753989361702 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table/a6996896-1464-4b55-a784-28deb06150c8.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table/a6996896-1464-4b55-a784-28deb06150c8.json new file mode 100644 index 000000000..cddf6b143 --- /dev/null +++ b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table/a6996896-1464-4b55-a784-28deb06150c8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table/1762652580.600162", + "retrieved_timestamp": "1762652580.600162", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table", + "developer": "xukp20", + "inference_platform": "unknown", + "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.575601625908146 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4901206199104098 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09969788519637462 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36596874999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36585771276595747 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table/406f36fc-1243-4342-80c6-95b96fcc003f.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table/406f36fc-1243-4342-80c6-95b96fcc003f.json new file mode 100644 index 000000000..d4b99303c --- /dev/null +++ b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table/406f36fc-1243-4342-80c6-95b96fcc003f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table/1762652580.600485", + "retrieved_timestamp": "1762652580.6004858", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table", + "developer": "xukp20", + "inference_platform": "unknown", + "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7034457461757027 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5091868512191421 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09667673716012085 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37390624999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3692652925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table/87bcbd57-2d0e-4d77-9f1e-3ec0199c8452.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table/87bcbd57-2d0e-4d77-9f1e-3ec0199c8452.json new file mode 100644 index 000000000..c3f6ae0e6 --- /dev/null +++ b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table/87bcbd57-2d0e-4d77-9f1e-3ec0199c8452.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table/1762652580.6007009", + "retrieved_timestamp": "1762652580.6007009", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table", + "developer": "xukp20", + "inference_platform": "unknown", + "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6023794642659255 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49695315361511977 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36736458333333327 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3657746010638298 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table/d7125235-7b17-4a90-9125-c993646cd7c8.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table/d7125235-7b17-4a90-9125-c993646cd7c8.json new file mode 100644 index 000000000..f4aa246b6 --- /dev/null +++ b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table/d7125235-7b17-4a90-9125-c993646cd7c8.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table/1762652580.600907", + "retrieved_timestamp": "1762652580.600908", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table", + "developer": "xukp20", + "inference_platform": "unknown", + "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6620300801872365 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49999369392208165 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09365558912386707 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38181249999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3614527925531915 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001/d758e9a9-c316-4de5-bdb7-d0ec7401fa12.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001/d758e9a9-c316-4de5-bdb7-d0ec7401fa12.json new file mode 100644 index 000000000..b9e6a6c60 --- /dev/null +++ b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001/d758e9a9-c316-4de5-bdb7-d0ec7401fa12.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001/1762652580.601125", + "retrieved_timestamp": "1762652580.601126", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001", + "developer": "xukp20", + "inference_platform": "unknown", + "id": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5336363072203975 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49148727192613517 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09818731117824774 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37796874999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3624501329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002/d1445003-91ea-4b2b-ab38-a47a6392620e.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002/d1445003-91ea-4b2b-ab38-a47a6392620e.json new file mode 100644 index 000000000..e685fd11c --- /dev/null +++ b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002/d1445003-91ea-4b2b-ab38-a47a6392620e.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002/1762652580.601484", + "retrieved_timestamp": "1762652580.6014872", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002", + "developer": "xukp20", + "inference_platform": "unknown", + "id": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6851609285584471 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.507516320435292 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07175226586102719 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25838926174496646 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3831770833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3621176861702128 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001/4d9c2e04-caef-43f5-9ce1-40517341ff40.json b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001/4d9c2e04-caef-43f5-9ce1-40517341ff40.json new file mode 100644 index 000000000..ef6a2116d --- /dev/null +++ b/data/hfopenllm_v2/xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001/4d9c2e04-caef-43f5-9ce1-40517341ff40.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001/1762652580.601857", + "retrieved_timestamp": "1762652580.6018581", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001", + "developer": "xukp20", + "inference_platform": "unknown", + "id": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5482242671666733 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.48871746894288526 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0891238670694864 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3632708333333334 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36710438829787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table/5d53b35f-6bff-493c-805d-b45517ae0e2b.json b/data/hfopenllm_v2/xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table/5d53b35f-6bff-493c-805d-b45517ae0e2b.json new file mode 100644 index 000000000..f8f807671 --- /dev/null +++ b/data/hfopenllm_v2/xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table/5d53b35f-6bff-493c-805d-b45517ae0e2b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xukp20_llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table/1762652580.602122", + "retrieved_timestamp": "1762652580.602124", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table", + "developer": "xukp20", + "inference_platform": "unknown", + "id": "xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6900069593124022 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4978456981516493 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10498489425981873 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3673333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37159242021276595 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/xwen-team/Xwen-7B-Chat/a099778d-4c47-472e-872d-8fffcdf2764f.json b/data/hfopenllm_v2/xwen-team/Xwen-7B-Chat/a099778d-4c47-472e-872d-8fffcdf2764f.json new file mode 100644 index 000000000..b64040338 --- /dev/null +++ b/data/hfopenllm_v2/xwen-team/Xwen-7B-Chat/a099778d-4c47-472e-872d-8fffcdf2764f.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/xwen-team_Xwen-7B-Chat/1762652580.602432", + "retrieved_timestamp": "1762652580.602433", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "xwen-team/Xwen-7B-Chat", + "developer": "xwen-team", + "inference_platform": "unknown", + "id": "xwen-team/Xwen-7B-Chat" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6864098370102439 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.506762793166296 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4509063444108761 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2609060402684564 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3914270833333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42902260638297873 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 7.616 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yam-peleg/Hebrew-Gemma-11B-Instruct/5d25872d-eacd-4e5c-b9cc-9ee014147730.json b/data/hfopenllm_v2/yam-peleg/Hebrew-Gemma-11B-Instruct/5d25872d-eacd-4e5c-b9cc-9ee014147730.json new file mode 100644 index 000000000..471d7c78f --- /dev/null +++ b/data/hfopenllm_v2/yam-peleg/Hebrew-Gemma-11B-Instruct/5d25872d-eacd-4e5c-b9cc-9ee014147730.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yam-peleg_Hebrew-Gemma-11B-Instruct/1762652580.603103", + "retrieved_timestamp": "1762652580.603105", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yam-peleg/Hebrew-Gemma-11B-Instruct", + "developer": "yam-peleg", + "inference_platform": "unknown", + "id": "yam-peleg/Hebrew-Gemma-11B-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30207737691547315 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40357843109818686 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06570996978851963 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.276006711409396 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4088541666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25540226063829785 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "GemmaForCausalLM", + "params_billions": 10.475 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yanng1242/Marcoro14-7B-slerp/f5005cc2-cec4-4a1c-be09-a670d996d15b.json b/data/hfopenllm_v2/yanng1242/Marcoro14-7B-slerp/f5005cc2-cec4-4a1c-be09-a670d996d15b.json new file mode 100644 index 000000000..b9b8a3442 --- /dev/null +++ b/data/hfopenllm_v2/yanng1242/Marcoro14-7B-slerp/f5005cc2-cec4-4a1c-be09-a670d996d15b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yanng1242_Marcoro14-7B-slerp/1762652580.604092", + "retrieved_timestamp": "1762652580.604092", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yanng1242/Marcoro14-7B-slerp", + "developer": "yanng1242", + "inference_platform": "unknown", + "id": "yanng1242/Marcoro14-7B-slerp" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4059916576904835 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5251655292981787 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07477341389728097 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3145973154362416 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.468625 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3168218085106383 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MistralForCausalLM", + "params_billions": 7.242 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yasserrmd/Coder-GRPO-3B/425372c0-e096-4bdf-8f6c-eb2d5b36bb07.json b/data/hfopenllm_v2/yasserrmd/Coder-GRPO-3B/425372c0-e096-4bdf-8f6c-eb2d5b36bb07.json new file mode 100644 index 000000000..796c23299 --- /dev/null +++ b/data/hfopenllm_v2/yasserrmd/Coder-GRPO-3B/425372c0-e096-4bdf-8f6c-eb2d5b36bb07.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yasserrmd_Coder-GRPO-3B/1762652580.6044621", + "retrieved_timestamp": "1762652580.604463", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yasserrmd/Coder-GRPO-3B", + "developer": "yasserrmd", + "inference_platform": "unknown", + "id": "yasserrmd/Coder-GRPO-3B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6207640172520024 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4469120364616385 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3202416918429003 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.27768456375838924 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4114583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3197307180851064 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 3.086 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yasserrmd/Text2SQL-1.5B/42a767cf-7d29-486d-b83e-fcfa51f048c1.json b/data/hfopenllm_v2/yasserrmd/Text2SQL-1.5B/42a767cf-7d29-486d-b83e-fcfa51f048c1.json new file mode 100644 index 000000000..cacc260f4 --- /dev/null +++ b/data/hfopenllm_v2/yasserrmd/Text2SQL-1.5B/42a767cf-7d29-486d-b83e-fcfa51f048c1.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yasserrmd_Text2SQL-1.5B/1762652580.604796", + "retrieved_timestamp": "1762652580.6047978", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yasserrmd/Text2SQL-1.5B", + "developer": "yasserrmd", + "inference_platform": "unknown", + "id": "yasserrmd/Text2SQL-1.5B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2857407235025289 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38577157961565695 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06797583081570997 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.287751677852349 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39423958333333337 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23628656914893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.544 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/2419f2a3-03df-4521-9baa-346e3cb53181.json b/data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/2419f2a3-03df-4521-9baa-346e3cb53181.json new file mode 100644 index 000000000..3d96e595e --- /dev/null +++ b/data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/2419f2a3-03df-4521-9baa-346e3cb53181.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ycros_BagelMIsteryTour-v2-8x7B/1762652580.605103", + "retrieved_timestamp": "1762652580.6051042", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ycros/BagelMIsteryTour-v2-8x7B", + "developer": "ycros", + "inference_platform": "unknown", + "id": "ycros/BagelMIsteryTour-v2-8x7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.599431730031871 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.515923595752544 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.07854984894259819 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30453020134228187 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4202916666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.34732380319148937 + } + } + ], + "additional_details": { + "precision": "float16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/a88e7110-2a58-4f47-801f-2a49037eaed6.json b/data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/a88e7110-2a58-4f47-801f-2a49037eaed6.json new file mode 100644 index 000000000..d99a0fb4c --- /dev/null +++ b/data/hfopenllm_v2/ycros/BagelMIsteryTour-v2-8x7B/a88e7110-2a58-4f47-801f-2a49037eaed6.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ycros_BagelMIsteryTour-v2-8x7B/1762652580.605396", + "retrieved_timestamp": "1762652580.605397", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ycros/BagelMIsteryTour-v2-8x7B", + "developer": "ycros", + "inference_platform": "unknown", + "id": "ycros/BagelMIsteryTour-v2-8x7B" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6262095683896506 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5141943573573103 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09365558912386707 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30788590604026844 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.41375 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3480718085106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 46.703 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table/cd2f94a5-595a-469e-b34e-a5f9abb82e6b.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table/cd2f94a5-595a-469e-b34e-a5f9abb82e6b.json new file mode 100644 index 000000000..57e5e98f2 --- /dev/null +++ b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table/cd2f94a5-595a-469e-b34e-a5f9abb82e6b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table/1762652580.605642", + "retrieved_timestamp": "1762652580.605643", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table", + "developer": "yfzp", + "inference_platform": "unknown", + "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6708976626462231 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49866134349131935 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11178247734138973 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37269791666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37159242021276595 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table/c19ed336-aadf-4af3-a0e5-1c1946a17ce4.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table/c19ed336-aadf-4af3-a0e5-1c1946a17ce4.json new file mode 100644 index 000000000..b800a22aa --- /dev/null +++ b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table/c19ed336-aadf-4af3-a0e5-1c1946a17ce4.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table/1762652580.605978", + "retrieved_timestamp": "1762652580.605979", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table", + "developer": "yfzp", + "inference_platform": "unknown", + "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7332710541363582 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5080359954971677 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10347432024169184 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38060416666666663 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3748337765957447 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table/d6cadac8-17a9-430f-94b3-6eb0c7ecc146.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table/d6cadac8-17a9-430f-94b3-6eb0c7ecc146.json new file mode 100644 index 000000000..ba825826d --- /dev/null +++ b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table/d6cadac8-17a9-430f-94b3-6eb0c7ecc146.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table/1762652580.60626", + "retrieved_timestamp": "1762652580.606261", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table", + "developer": "yfzp", + "inference_platform": "unknown", + "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6784664689690023 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49412091896520455 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.11253776435045318 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3646666666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37175864361702127 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table/0bdeac20-0505-459e-b417-ea4cb2f95cec.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table/0bdeac20-0505-459e-b417-ea4cb2f95cec.json new file mode 100644 index 000000000..e2737ae23 --- /dev/null +++ b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table/0bdeac20-0505-459e-b417-ea4cb2f95cec.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table/1762652580.6064892", + "retrieved_timestamp": "1762652580.6064901", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table", + "developer": "yfzp", + "inference_platform": "unknown", + "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7131876753680235 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5025359954971677 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09894259818731117 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3713333333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36826795212765956 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001/b1ad6a57-8cad-4cca-8dd6-00ebd35089ab.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001/b1ad6a57-8cad-4cca-8dd6-00ebd35089ab.json new file mode 100644 index 000000000..d3a3cd532 --- /dev/null +++ b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001/b1ad6a57-8cad-4cca-8dd6-00ebd35089ab.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001/1762652580.606723", + "retrieved_timestamp": "1762652580.606724", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001", + "developer": "yfzp", + "inference_platform": "unknown", + "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6495653754260917 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4979459532536201 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10120845921450151 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37796874999999996 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37200797872340424 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002/249af8cd-717b-4ee9-8ac7-740f16708675.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002/249af8cd-717b-4ee9-8ac7-740f16708675.json new file mode 100644 index 000000000..5098b588e --- /dev/null +++ b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002/249af8cd-717b-4ee9-8ac7-740f16708675.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002/1762652580.6069329", + "retrieved_timestamp": "1762652580.606934", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002", + "developer": "yfzp", + "inference_platform": "unknown", + "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7196073086078272 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5045147424411157 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08761329305135952 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2600671140939597 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3831458333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3734208776595745 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001/338737c7-29cf-44d8-be92-6749167b7c03.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001/338737c7-29cf-44d8-be92-6749167b7c03.json new file mode 100644 index 000000000..571a219f6 --- /dev/null +++ b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001/338737c7-29cf-44d8-be92-6749167b7c03.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001/1762652580.6072068", + "retrieved_timestamp": "1762652580.6072068", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001", + "developer": "yfzp", + "inference_platform": "unknown", + "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6504397221594258 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49578758563187125 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.09365558912386707 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36603125 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3702626329787234 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002/aa12336f-556c-4222-a10c-529eb74a793b.json b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002/aa12336f-556c-4222-a10c-529eb74a793b.json new file mode 100644 index 000000000..f53af5066 --- /dev/null +++ b/data/hfopenllm_v2/yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002/aa12336f-556c-4222-a10c-529eb74a793b.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002/1762652580.607418", + "retrieved_timestamp": "1762652580.6074188", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002", + "developer": "yfzp", + "inference_platform": "unknown", + "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7015973173402128 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4991547169583548 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08685800604229607 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25922818791946306 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.37790624999999994 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.366938164893617 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002/79fad1b7-c458-4f89-9d7a-d58f70ba6c90.json b/data/hfopenllm_v2/yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002/79fad1b7-c458-4f89-9d7a-d58f70ba6c90.json new file mode 100644 index 000000000..c8e6a260a --- /dev/null +++ b/data/hfopenllm_v2/yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002/79fad1b7-c458-4f89-9d7a-d58f70ba6c90.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yifAI_Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002/1762652580.6077929", + "retrieved_timestamp": "1762652580.607796", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002", + "developer": "yifAI", + "inference_platform": "unknown", + "id": "yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6489658550423987 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.49145217071254876 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0755287009063444 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26174496644295303 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38987499999999997 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3519780585106383 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8/5e4e3c08-71cd-4241-bfe9-bc242f0cc32a.json b/data/hfopenllm_v2/ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8/5e4e3c08-71cd-4241-bfe9-bc242f0cc32a.json new file mode 100644 index 000000000..88b695edb --- /dev/null +++ b/data/hfopenllm_v2/ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8/5e4e3c08-71cd-4241-bfe9-bc242f0cc32a.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ylalain_ECE-PRYMMAL-YL-1B-SLERP-V8/1762652580.608171", + "retrieved_timestamp": "1762652580.608172", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8", + "developer": "ylalain", + "inference_platform": "unknown", + "id": "ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.15052726764983576 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3975573100103517 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.004531722054380665 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28942953020134227 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3874583333333333 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.23836436170212766 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 1.357 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ymcki/Llama-3.1-8B-GRPO-Instruct/cb38b3bb-6188-430f-b863-9bf86cc877f9.json b/data/hfopenllm_v2/ymcki/Llama-3.1-8B-GRPO-Instruct/cb38b3bb-6188-430f-b863-9bf86cc877f9.json new file mode 100644 index 000000000..aac2b57a3 --- /dev/null +++ b/data/hfopenllm_v2/ymcki/Llama-3.1-8B-GRPO-Instruct/cb38b3bb-6188-430f-b863-9bf86cc877f9.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ymcki_Llama-3.1-8B-GRPO-Instruct/1762652580.608475", + "retrieved_timestamp": "1762652580.608476", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ymcki/Llama-3.1-8B-GRPO-Instruct", + "developer": "ymcki", + "inference_platform": "unknown", + "id": "ymcki/Llama-3.1-8B-GRPO-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.744536718130117 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5131586337530801 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.20241691842900303 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.29446308724832215 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.38165625000000003 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3738364361702128 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/ymcki/Llama-3.1-8B-SFT-GRPO-Instruct/938af657-ca9b-4400-84e1-002065f92f84.json b/data/hfopenllm_v2/ymcki/Llama-3.1-8B-SFT-GRPO-Instruct/938af657-ca9b-4400-84e1-002065f92f84.json new file mode 100644 index 000000000..265d38f16 --- /dev/null +++ b/data/hfopenllm_v2/ymcki/Llama-3.1-8B-SFT-GRPO-Instruct/938af657-ca9b-4400-84e1-002065f92f84.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/ymcki_Llama-3.1-8B-SFT-GRPO-Instruct/1762652580.608792", + "retrieved_timestamp": "1762652580.608793", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "ymcki/Llama-3.1-8B-SFT-GRPO-Instruct", + "developer": "ymcki", + "inference_platform": "unknown", + "id": "ymcki/Llama-3.1-8B-SFT-GRPO-Instruct" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.33540007180946557 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3126261967336083 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.04003021148036254 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2533557046979866 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.35260416666666666 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10979055851063829 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO/d22c83a1-9c1c-43df-b033-c6cb75cb389d.json b/data/hfopenllm_v2/yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO/d22c83a1-9c1c-43df-b033-c6cb75cb389d.json new file mode 100644 index 000000000..9e51af3a8 --- /dev/null +++ b/data/hfopenllm_v2/yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO/d22c83a1-9c1c-43df-b033-c6cb75cb389d.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/yuvraj17_Llama3-8B-SuperNova-Spectrum-Hermes-DPO/1762652580.611586", + "retrieved_timestamp": "1762652580.611586", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO", + "developer": "yuvraj17", + "inference_platform": "unknown", + "id": "yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4690897928607206 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4399870586095269 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.05664652567975831 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.30201342281879195 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40121875 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2634640957446808 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "LlamaForCausalLM", + "params_billions": 8.03 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zake7749/gemma-2-2b-it-chinese-kyara-dpo/4fbaf39a-86a1-4b79-aeeb-e14c2de64666.json b/data/hfopenllm_v2/zake7749/gemma-2-2b-it-chinese-kyara-dpo/4fbaf39a-86a1-4b79-aeeb-e14c2de64666.json new file mode 100644 index 000000000..e4e908b21 --- /dev/null +++ b/data/hfopenllm_v2/zake7749/gemma-2-2b-it-chinese-kyara-dpo/4fbaf39a-86a1-4b79-aeeb-e14c2de64666.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zake7749_gemma-2-2b-it-chinese-kyara-dpo/1762652580.612313", + "retrieved_timestamp": "1762652580.6123142", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zake7749/gemma-2-2b-it-chinese-kyara-dpo", + "developer": "zake7749", + "inference_platform": "unknown", + "id": "zake7749/gemma-2-2b-it-chinese-kyara-dpo" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5382075116247114 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4257464897414603 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.08383685800604229 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.26677852348993286 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.45756250000000004 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.25731382978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 2.614 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zelk12/Test01012025155054/e25f6fa3-238e-4bc3-b6ce-cdc2bc728d9c.json b/data/hfopenllm_v2/zelk12/Test01012025155054/e25f6fa3-238e-4bc3-b6ce-cdc2bc728d9c.json new file mode 100644 index 000000000..9dab7a8f9 --- /dev/null +++ b/data/hfopenllm_v2/zelk12/Test01012025155054/e25f6fa3-238e-4bc3-b6ce-cdc2bc728d9c.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zelk12_Test01012025155054/1762652580.6282592", + "retrieved_timestamp": "1762652580.6282601", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zelk12/Test01012025155054", + "developer": "zelk12", + "inference_platform": "unknown", + "id": "zelk12/Test01012025155054" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.1555229014570229 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28295044895258115 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.0 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.24161073825503357 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.36702083333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.10904255319148937 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Gemma2ForCausalLM", + "params_billions": 3.817 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zetasepic/Qwen2.5-32B-Instruct-abliterated-v2/a5490bf2-6d11-4474-b6e5-07a79d30f431.json b/data/hfopenllm_v2/zetasepic/Qwen2.5-32B-Instruct-abliterated-v2/a5490bf2-6d11-4474-b6e5-07a79d30f431.json new file mode 100644 index 000000000..5a7d2d3db --- /dev/null +++ b/data/hfopenllm_v2/zetasepic/Qwen2.5-32B-Instruct-abliterated-v2/a5490bf2-6d11-4474-b6e5-07a79d30f431.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zetasepic_Qwen2.5-32B-Instruct-abliterated-v2/1762652580.6318998", + "retrieved_timestamp": "1762652580.631902", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zetasepic/Qwen2.5-32B-Instruct-abliterated-v2", + "developer": "zetasepic", + "inference_platform": "unknown", + "id": "zetasepic/Qwen2.5-32B-Instruct-abliterated-v2" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.8334131216283904 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.6934020817780425 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.595166163141994 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.3674496644295302 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.43542708333333335 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5621675531914894 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 32.764 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zetasepic/Qwen2.5-72B-Instruct-abliterated/78799fe1-5fbd-4023-9462-8d826dac41d5.json b/data/hfopenllm_v2/zetasepic/Qwen2.5-72B-Instruct-abliterated/78799fe1-5fbd-4023-9462-8d826dac41d5.json new file mode 100644 index 000000000..a2ae860d1 --- /dev/null +++ b/data/hfopenllm_v2/zetasepic/Qwen2.5-72B-Instruct-abliterated/78799fe1-5fbd-4023-9462-8d826dac41d5.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zetasepic_Qwen2.5-72B-Instruct-abliterated/1762652580.632342", + "retrieved_timestamp": "1762652580.632343", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zetasepic/Qwen2.5-72B-Instruct-abliterated", + "developer": "zetasepic", + "inference_platform": "unknown", + "id": "zetasepic/Qwen2.5-72B-Instruct-abliterated" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7152610628687439 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.7152257183282452 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5241691842900302 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.40687919463087246 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4719166666666667 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.5871841755319149 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "Qwen2ForCausalLM", + "params_billions": 72.706 + } +} \ No newline at end of file diff --git a/data/hfopenllm_v2/zhengr/MixTAO-7Bx2-MoE-v8.1/35068575-06a3-4541-bdf3-120bd6db2867.json b/data/hfopenllm_v2/zhengr/MixTAO-7Bx2-MoE-v8.1/35068575-06a3-4541-bdf3-120bd6db2867.json new file mode 100644 index 000000000..ea9229fd6 --- /dev/null +++ b/data/hfopenllm_v2/zhengr/MixTAO-7Bx2-MoE-v8.1/35068575-06a3-4541-bdf3-120bd6db2867.json @@ -0,0 +1,107 @@ +{ + "schema_version": "0.0.1", + "evaluation_id": "hfopenllm_v2/zhengr_MixTAO-7Bx2-MoE-v8.1/1762652580.6327481", + "retrieved_timestamp": "1762652580.632749", + "source_data": [ + "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" + ], + "evaluation_source": { + "evaluation_source_name": "HF Open LLM v2", + "evaluation_source_type": "leaderboard" + }, + "source_metadata": { + "source_organization_name": "Hugging Face", + "evaluator_relationship": "third_party" + }, + "model_info": { + "name": "zhengr/MixTAO-7Bx2-MoE-v8.1", + "developer": "zhengr", + "inference_platform": "unknown", + "id": "zhengr/MixTAO-7Bx2-MoE-v8.1" + }, + "evaluation_results": [ + { + "evaluation_name": "IFEval", + "metric_config": { + "evaluation_description": "Accuracy on IFEval", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.4187810564856802 + } + }, + { + "evaluation_name": "BBH", + "metric_config": { + "evaluation_description": "Accuracy on BBH", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.42019437560239653 + } + }, + { + "evaluation_name": "MATH Level 5", + "metric_config": { + "evaluation_description": "Exact Match on MATH Level 5", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.06042296072507553 + } + }, + { + "evaluation_name": "GPQA", + "metric_config": { + "evaluation_description": "Accuracy on GPQA", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.2986577181208054 + } + }, + { + "evaluation_name": "MUSR", + "metric_config": { + "evaluation_description": "Accuracy on MUSR", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.39762499999999995 + } + }, + { + "evaluation_name": "MMLU-PRO", + "metric_config": { + "evaluation_description": "Accuracy on MMLU-PRO", + "lower_is_better": false, + "score_type": "continuous", + "min_score": 0, + "max_score": 1 + }, + "score_details": { + "score": 0.28465757978723405 + } + } + ], + "additional_details": { + "precision": "bfloat16", + "architecture": "MixtralForCausalLM", + "params_billions": 12.879 + } +} \ No newline at end of file